preview_dataset / scripts /generate_changelog.py
Emil Ernerfeldt
Rename example to rerun_example_huggingface
5245583
raw
history blame
5.48 kB
#!/usr/bin/env python3
# Copied from https://github.com/rerun-io/rerun_template
"""
Summarizes recent PRs based on their GitHub labels.
The result can be copy-pasted into CHANGELOG.md,
though it often needs some manual editing too.
"""
from __future__ import annotations
import argparse
import multiprocessing
import os
import re
import sys
from dataclasses import dataclass
from typing import Any, Optional
import requests
from git import Repo # pip install GitPython
from tqdm import tqdm
OWNER = "rerun-io"
REPO = "rerun_example_huggingface"
INCLUDE_LABELS = False # It adds quite a bit of visual noise
OFFICIAL_RERUN_DEVS = [
"abey79",
"emilk",
"jleibs",
"jprochazk",
"nikolausWest",
"teh-cmc",
"Wumpf",
]
@dataclass
class PrInfo:
gh_user_name: str
pr_title: str
labels: list[str]
@dataclass
class CommitInfo:
hexsha: str
title: str
pr_number: Optional[int]
def get_github_token() -> str:
token = os.environ.get("GH_ACCESS_TOKEN", "")
if token != "":
return token
home_dir = os.path.expanduser("~")
token_file = os.path.join(home_dir, ".githubtoken")
try:
with open(token_file, encoding="utf8") as f:
token = f.read().strip()
return token
except Exception:
pass
print("ERROR: expected a GitHub token in the environment variable GH_ACCESS_TOKEN or in ~/.githubtoken")
sys.exit(1)
# Slow
def fetch_pr_info_from_commit_info(commit_info: CommitInfo) -> Optional[PrInfo]:
if commit_info.pr_number is None:
return None
else:
return fetch_pr_info(commit_info.pr_number)
# Slow
def fetch_pr_info(pr_number: int) -> Optional[PrInfo]:
url = f"https://api.github.com/repos/{OWNER}/{REPO}/pulls/{pr_number}"
gh_access_token = get_github_token()
headers = {"Authorization": f"Token {gh_access_token}"}
response = requests.get(url, headers=headers)
json = response.json()
# Check if the request was successful (status code 200)
if response.status_code == 200:
labels = [label["name"] for label in json["labels"]]
gh_user_name = json["user"]["login"]
return PrInfo(gh_user_name=gh_user_name, pr_title=json["title"], labels=labels)
else:
print(f"ERROR {url}: {response.status_code} - {json['message']}")
return None
def get_commit_info(commit: Any) -> CommitInfo:
match = re.match(r"(.*) \(#(\d+)\)", commit.summary)
if match:
title = str(match.group(1))
pr_number = int(match.group(2))
return CommitInfo(hexsha=commit.hexsha, title=title, pr_number=pr_number)
else:
return CommitInfo(hexsha=commit.hexsha, title=commit.summary, pr_number=None)
def remove_prefix(text: str, prefix: str) -> str:
if text.startswith(prefix):
return text[len(prefix) :]
return text # or whatever
def print_section(crate: str, items: list[str]) -> None:
if 0 < len(items):
print(f"#### {crate}")
for line in items:
print(f"* {line}")
print()
def main() -> None:
parser = argparse.ArgumentParser(description="Generate a changelog.")
parser.add_argument("--commit-range", help="e.g. 0.1.0..HEAD", required=True)
args = parser.parse_args()
repo = Repo(".")
commits = list(repo.iter_commits(args.commit_range))
commits.reverse() # Most recent last
commit_infos = list(map(get_commit_info, commits))
pool = multiprocessing.Pool()
pr_infos = list(
tqdm(
pool.imap(fetch_pr_info_from_commit_info, commit_infos),
total=len(commit_infos),
desc="Fetch PR info commits",
)
)
prs = []
unsorted_commits = []
for commit_info, pr_info in zip(commit_infos, pr_infos):
hexsha = commit_info.hexsha
title = commit_info.title
title = title.rstrip(".").strip() # Some PR end with an unnecessary period
pr_number = commit_info.pr_number
if pr_number is None:
# Someone committed straight to main:
summary = f"{title} [{hexsha[:7]}](https://github.com/{OWNER}/{REPO}/commit/{hexsha})"
unsorted_commits.append(summary)
else:
# We prefer the PR title if available
title = pr_info.pr_title if pr_info else title
labels = pr_info.labels if pr_info else []
if "exclude from changelog" in labels:
continue
if "typo" in labels:
# We get so many typo PRs. Let's not flood the changelog with them.
continue
summary = f"{title} [#{pr_number}](https://github.com/{OWNER}/{REPO}/pull/{pr_number})"
if INCLUDE_LABELS and 0 < len(labels):
summary += f" ({', '.join(labels)})"
if pr_info is not None:
gh_user_name = pr_info.gh_user_name
if gh_user_name not in OFFICIAL_RERUN_DEVS:
summary += f" (thanks [@{gh_user_name}](https://github.com/{gh_user_name})!)"
prs.append(summary)
# Clean up:
for i in range(len(prs)):
line = prs[i]
line = line[0].upper() + line[1:] # Upper-case first letter
prs[i] = line
print()
print(f"Full diff at https://github.com/rerun-io/{REPO}/compare/{args.commit_range}")
print()
print_section("PRs", prs)
print_section("Unsorted commits", unsorted_commits)
if __name__ == "__main__":
main()