Spaces:
Running
Running
File size: 5,484 Bytes
6ae6e49 5245583 6ae6e49 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
#!/usr/bin/env python3
# Copied from https://github.com/rerun-io/rerun_template
"""
Summarizes recent PRs based on their GitHub labels.
The result can be copy-pasted into CHANGELOG.md,
though it often needs some manual editing too.
"""
from __future__ import annotations
import argparse
import multiprocessing
import os
import re
import sys
from dataclasses import dataclass
from typing import Any, Optional
import requests
from git import Repo # pip install GitPython
from tqdm import tqdm
OWNER = "rerun-io"
REPO = "rerun_example_huggingface"
INCLUDE_LABELS = False # It adds quite a bit of visual noise
OFFICIAL_RERUN_DEVS = [
"abey79",
"emilk",
"jleibs",
"jprochazk",
"nikolausWest",
"teh-cmc",
"Wumpf",
]
@dataclass
class PrInfo:
gh_user_name: str
pr_title: str
labels: list[str]
@dataclass
class CommitInfo:
hexsha: str
title: str
pr_number: Optional[int]
def get_github_token() -> str:
token = os.environ.get("GH_ACCESS_TOKEN", "")
if token != "":
return token
home_dir = os.path.expanduser("~")
token_file = os.path.join(home_dir, ".githubtoken")
try:
with open(token_file, encoding="utf8") as f:
token = f.read().strip()
return token
except Exception:
pass
print("ERROR: expected a GitHub token in the environment variable GH_ACCESS_TOKEN or in ~/.githubtoken")
sys.exit(1)
# Slow
def fetch_pr_info_from_commit_info(commit_info: CommitInfo) -> Optional[PrInfo]:
if commit_info.pr_number is None:
return None
else:
return fetch_pr_info(commit_info.pr_number)
# Slow
def fetch_pr_info(pr_number: int) -> Optional[PrInfo]:
url = f"https://api.github.com/repos/{OWNER}/{REPO}/pulls/{pr_number}"
gh_access_token = get_github_token()
headers = {"Authorization": f"Token {gh_access_token}"}
response = requests.get(url, headers=headers)
json = response.json()
# Check if the request was successful (status code 200)
if response.status_code == 200:
labels = [label["name"] for label in json["labels"]]
gh_user_name = json["user"]["login"]
return PrInfo(gh_user_name=gh_user_name, pr_title=json["title"], labels=labels)
else:
print(f"ERROR {url}: {response.status_code} - {json['message']}")
return None
def get_commit_info(commit: Any) -> CommitInfo:
match = re.match(r"(.*) \(#(\d+)\)", commit.summary)
if match:
title = str(match.group(1))
pr_number = int(match.group(2))
return CommitInfo(hexsha=commit.hexsha, title=title, pr_number=pr_number)
else:
return CommitInfo(hexsha=commit.hexsha, title=commit.summary, pr_number=None)
def remove_prefix(text: str, prefix: str) -> str:
if text.startswith(prefix):
return text[len(prefix) :]
return text # or whatever
def print_section(crate: str, items: list[str]) -> None:
if 0 < len(items):
print(f"#### {crate}")
for line in items:
print(f"* {line}")
print()
def main() -> None:
parser = argparse.ArgumentParser(description="Generate a changelog.")
parser.add_argument("--commit-range", help="e.g. 0.1.0..HEAD", required=True)
args = parser.parse_args()
repo = Repo(".")
commits = list(repo.iter_commits(args.commit_range))
commits.reverse() # Most recent last
commit_infos = list(map(get_commit_info, commits))
pool = multiprocessing.Pool()
pr_infos = list(
tqdm(
pool.imap(fetch_pr_info_from_commit_info, commit_infos),
total=len(commit_infos),
desc="Fetch PR info commits",
)
)
prs = []
unsorted_commits = []
for commit_info, pr_info in zip(commit_infos, pr_infos):
hexsha = commit_info.hexsha
title = commit_info.title
title = title.rstrip(".").strip() # Some PR end with an unnecessary period
pr_number = commit_info.pr_number
if pr_number is None:
# Someone committed straight to main:
summary = f"{title} [{hexsha[:7]}](https://github.com/{OWNER}/{REPO}/commit/{hexsha})"
unsorted_commits.append(summary)
else:
# We prefer the PR title if available
title = pr_info.pr_title if pr_info else title
labels = pr_info.labels if pr_info else []
if "exclude from changelog" in labels:
continue
if "typo" in labels:
# We get so many typo PRs. Let's not flood the changelog with them.
continue
summary = f"{title} [#{pr_number}](https://github.com/{OWNER}/{REPO}/pull/{pr_number})"
if INCLUDE_LABELS and 0 < len(labels):
summary += f" ({', '.join(labels)})"
if pr_info is not None:
gh_user_name = pr_info.gh_user_name
if gh_user_name not in OFFICIAL_RERUN_DEVS:
summary += f" (thanks [@{gh_user_name}](https://github.com/{gh_user_name})!)"
prs.append(summary)
# Clean up:
for i in range(len(prs)):
line = prs[i]
line = line[0].upper() + line[1:] # Upper-case first letter
prs[i] = line
print()
print(f"Full diff at https://github.com/rerun-io/{REPO}/compare/{args.commit_range}")
print()
print_section("PRs", prs)
print_section("Unsorted commits", unsorted_commits)
if __name__ == "__main__":
main()
|