""" Process and classify ICLR submissions using OpenReview API. This script processes ICLR submissions, classifies them into subdirectories based on decisions, extracts paper content into JSON format, and checks the validity of the processed papers. It includes three main functions: - classify_ICLR_submissions_into_subdirectories: Classifies papers into directories based on decisions. - process_submission: Processes each submission by extracting text and saving it as a JSON file. - check_processed_paper: Verifies if all processed papers are valid JSON files. """ import os import sys import traceback from collections import Counter from tqdm import tqdm sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from agentreview.arguments import parse_args from agentreview.utility.utils import print_colored decision_map = { # ICLR 2023 "Reject": "Reject", "Accept: poster": "Accept-poster", "Accept: notable-top-25%": "Accept-notable-top-25", "Accept: notable-top-5%": "Accept-notable-top-5", # ICLR 2022 "Accept (Poster)": "Accept-poster", "Accept (Oral)": "Accept-oral", "Accept (Spotlight)": "Accept-spotlight", # ICLR 2021 "Significant concerns (Do not publish)": "Significant-concerns", "Concerns raised (can publish with adjustment)": "Concerns-raised", # ICLR 2020 "Accept (Talk)": "Accept-oral", # We assume this signifies an oral presentation # ICLR 2018 "Invite to Workshop Track": "Reject" } def categorize_ICLR_submissions_into_subdirectories(): """Classifies ICLR submissions into subdirectories based on review decisions. This function iterates through the review notes and identifies the decision (recommendation or final decision) for each submission. It then moves the notes and their corresponding papers into directories based on the decision. Raises: AssertionError: If the line containing the decision does not have the expected format. """ note_dir = f"data/{args.conference}/notes" paper_dir = f"data/{args.conference}/paper" for note in os.listdir(note_dir): print(note) # Skip directories or irrelevant files if os.path.isdir(os.path.join(note_dir, note)) or ".DS_Store" in note: continue note_path = os.path.join(note_dir, note) lines = open(note_path, "r").readlines() decision = None for line in tqdm(lines): if "\"recommendation\"" in line: assert Counter(line)["\""] == 4, "Unexpected format in recommendation line." print(line) decision = line.split("\"recommendation\"")[1].split("\"")[1] break elif "\"decision\"" in line: assert Counter(line)["\""] == 4, "Unexpected format in decision line." print(line) try: decision = line.split("\"decision\"")[1].split("\"")[1] break except Exception: traceback.print_exc() print_colored(line, 'red') if decision is None: # Possibly withdrawn papers print_colored(f"Could not find decision for {note}", "red") continue os.makedirs(os.path.join(note_dir, decision_map[decision]), exist_ok=True) os.makedirs(os.path.join(paper_dir, decision_map[decision]), exist_ok=True) os.rename(note_path, os.path.join(note_dir, decision_map[decision], note)) paper_id = int(note.split(".json")[0]) paper_path = os.path.join(paper_dir, f"{paper_id}.pdf") os.rename(paper_path, os.path.join(paper_dir, decision_map[decision], f"{paper_id}.pdf")) if __name__ == "__main__": args = parse_args() # Extract contents of each paper into a JSON file categorize_ICLR_submissions_into_subdirectories()