import json import os import time import dotenv import html from summarize_paper import summarize_paper from fetch_data import fetch_paper_data_with_category from post_blog import post_blog from send_mail import send_email dotenv.load_dotenv() ACCESS_KEY = os.getenv("ACCESS_KEY") def fix_text(text: str) -> str: text = html.escape(text.encode('utf-8').decode('utf-8').replace("â¦", "..., ")) fixed_text = "" for word in text.split(): try: fixed_text += word.encode('latin1').decode('utf-8') + " " except Exception: fixed_text += word + " " return fixed_text.encode('utf-8').decode() def paper_data(paper_data_json: str, wait_time: int = 5) -> str: result_data = {"status": "success", "data": {}} papers_by_category = json.loads(paper_data_json) for category, papers in papers_by_category.items(): print(f"Processing category: {category}") result_data["data"][category] = {} for paper_id, details in papers.items(): doi = details.get("doi") pdf_url = details.get("pdf_url") title = details.get("title") title = html.escape(title) if title else "" citation = details.get("citation") if not all([paper_id, doi, pdf_url, title, citation]): print(f"Skipping paper with ID: {paper_id} (missing details)") continue summary, mindmap = None, None max_retries = 3 retry_count = 0 while (not summary or not mindmap) and retry_count < max_retries: try: summary, mindmap = summarize_paper(pdf_url, paper_id, ACCESS_KEY) if summary and mindmap: break except Exception as e: print(f"Error summarizing paper {paper_id}: {e}") retry_count += 1 if retry_count < max_retries: print(f"Retrying paper {paper_id} in 3 minutes") time.sleep(3 * 60) if not summary or not mindmap: print(f"Failed to summarize paper {paper_id} after {max_retries} attempts") continue try: fixed_title = html.escape(fix_text(title).strip()) fixed_citation = html.escape(fix_text(citation).strip()) fixed_summary = html.escape(str(summary).strip()) fixed_mindmap = html.escape(str(mindmap).strip()) post_status = post_blog(doi, fixed_title, category, fixed_summary, fixed_mindmap, fixed_citation, ACCESS_KEY, wait_time) except Exception as e: print(f"Error posting blog '{title}': {e}") continue result_data["data"][category][paper_id] = { "id": paper_id, "doi": doi, "title": fixed_title, "category": category, "posted": post_status, "citation": fixed_citation, "summary": fixed_summary, "mindmap": fixed_mindmap, } return json.dumps(result_data, indent=4, ensure_ascii=False) def post_blogpost(uaccess_key: str, wait_time: int = 5) -> str: if uaccess_key != ACCESS_KEY: return False data = fetch_paper_data_with_category(uaccess_key) processed_data = paper_data(data, wait_time) try: send_email(processed_data) print("\n-------------------------------------------------------\nMail Sent\n-------------------------------------------------------\n") except Exception as e: print(f"\n-------------------------------------------------------\nError sending mail: {e}\n-------------------------------------------------------\n") finally: print("\n-------------------------------------------------------\nProcess Completed\n-------------------------------------------------------\n") return processed_data def test(uaccess_key: str) -> str: if uaccess_key != ACCESS_KEY: return False test_data = { "Economics": { "2501.00578": { "paper_id": "2501.00578", "doi": "https://doi.org/10.1002/alz.14328", "title": "Bound-State Beta Decay of $\\mathbf{\\mathrm{^{205}{Tl}^{81+}}}$ Ions and the LOREX Project", "category": "Economics", "pdf_url": "https://arxiv.org/pdf/2501.00578", "citation": "Miller, A. D. (2025). The limits of tolerance (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2501.00578", } } } return paper_data(json.dumps(test_data, ensure_ascii=False, indent=4)) if __name__ == '__main__': result = test(ACCESS_KEY) print(result)