|
import json |
|
|
|
import argparse |
|
|
|
import requests |
|
import os |
|
import numpy as np |
|
import json |
|
import datetime |
|
import logging |
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
today = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") |
|
|
|
OWNER = "huggingface" |
|
REPO = "transformers" |
|
GITHUB_API_VERSION = "2022-11-28" |
|
TOKEN = os.environ.get("GITHUB_TOKEN") |
|
JSON_FILE = f"issues.json" |
|
UPDATE_FILE = False |
|
OVERWRITE_FILE = True |
|
|
|
|
|
def get_last_entry(file_path): |
|
with open(file_path, 'r') as file: |
|
|
|
last_line = file.readlines()[-1] |
|
return json.loads(last_line) |
|
|
|
|
|
def get_last_issue_number(file_path): |
|
if os.path.exists(file_path): |
|
last_entry = get_last_entry(file_path=file_path) |
|
return last_entry['number'] |
|
return 0 |
|
|
|
|
|
def get_issues( |
|
overwrite=OVERWRITE_FILE, |
|
update=UPDATE_FILE, |
|
output_filename=JSON_FILE, |
|
github_api_version=GITHUB_API_VERSION, |
|
owner=OWNER, |
|
repo=REPO, |
|
token=TOKEN, |
|
n_pages=-1, |
|
): |
|
""" |
|
Function to get the issues from the transformers repo and save them to a json file |
|
""" |
|
|
|
|
|
if os.path.exists(output_filename) and overwrite: |
|
logging.info(f"Deleting file {output_filename}") |
|
os.remove(output_filename) |
|
|
|
|
|
url = f"https://api.github.com/repos/{owner}/{repo}/issues" |
|
headers = { |
|
"Accept": "application/vnd.github+json", |
|
f"Authorization": f"{token}", |
|
"X-GitHub-Api-Version": f"{github_api_version}", |
|
"User-Agent": "amyeroberts", |
|
} |
|
last_issue_number = get_last_issue_number(file_path=output_filename) |
|
per_page = 100 |
|
page = last_issue_number // per_page + 1 |
|
query_params = { |
|
"state": "all", |
|
"per_page": per_page, |
|
"sort": "created", |
|
"direction": "asc", |
|
"page": page, |
|
} |
|
|
|
if os.path.exists(output_filename) and not update and not overwrite: |
|
raise ValueError(f"File {output_filename} already exists") |
|
|
|
page_limit = (n_pages + page) if n_pages > 0 else np.inf |
|
while True: |
|
if page >= page_limit: |
|
break |
|
|
|
|
|
response = requests.get(url, headers=headers, params=query_params) |
|
|
|
if not response.status_code == 200: |
|
raise ValueError( |
|
f"Request failed with status code {response.status_code} and message {response.text}" |
|
) |
|
|
|
json_response = response.json() |
|
logger.info(f"Page: {page}, number of issues: {len(json_response)}") |
|
|
|
|
|
if len(json_response) == 0: |
|
break |
|
|
|
with open(output_filename, "a") as f: |
|
for value in json_response: |
|
if value["number"] <= last_issue_number: |
|
continue |
|
json.dump(value, f) |
|
f.write("\n") |
|
|
|
if len(json_response) < per_page: |
|
break |
|
|
|
page += 1 |
|
query_params["page"] = page |
|
|
|
return output_filename |
|
|
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--update", action="store_true", default=True) |
|
parser.add_argument("--overwrite", action="store_true", default=False) |
|
parser.add_argument("--output_filename", type=str, default=JSON_FILE) |
|
parser.add_argument("--github_api_version", type=str, default=GITHUB_API_VERSION) |
|
parser.add_argument("--owner", type=str, default=OWNER) |
|
parser.add_argument("--repo", type=str, default=REPO) |
|
parser.add_argument("--token", type=str, default=TOKEN) |
|
parser.add_argument("--n_pages", type=int, default=-1) |
|
args = parser.parse_args() |
|
get_issues(**vars(args)) |
|
|