import json import os from datetime import datetime, timezone from fasthtml.common import * from huggingface_hub import HfApi, hf_hub_download from starlette.responses import FileResponse from generate_newsletter import process_new_papers from apscheduler.schedulers.background import BackgroundScheduler from apscheduler.triggers.cron import CronTrigger # Initialize Hugging Face API HF_TOKEN = os.environ.get("HF_TOKEN") DATASET_NAME = "cmcmaster/this_week_in_rheumatology" api = HfApi(token=HF_TOKEN) # Initialize scheduler scheduler = BackgroundScheduler() # Schedule newsletter generation to run every Monday at 1 AM UTC scheduler.add_job(process_new_papers, trigger="interval", hours=6, kwargs={ 'end_date': '{{ (execution_date - timedelta(days=execution_date.weekday() + 1)).strftime("%Y-%m-%d") }}', 'test': False }, id='generate_newsletter', name='Weekly newsletter generation', replace_existing=True) css = Style(""" body { font-family: Georgia, Times, serif; line-height: 1.6; color: #333; max-width: 800px; margin: 0 auto; padding: 20px; background: #fff; } h1, h2 { color: #2c3e50; font-family: Georgia, Times, serif; } a { color: #2c3e50; text-decoration: none; } a:hover { text-decoration: underline; } ul { list-style-type: none; padding: 0; } li { margin-bottom: 10px; } .newsletter-content { margin-top: 20px; } .download-links { margin: 20px 0; } .download-link { display: inline-block; padding: 10px 20px; background-color: #2c3e50; color: white; border-radius: 3px; margin: 0 10px 10px 0; font-family: Georgia, Times, serif; } .download-link:hover { background-color: #34495e; text-decoration: none; } """) app = FastHTML(hdrs=(css, MarkdownJS(), HighlightJS( langs=['python', 'javascript', 'html', 'css']))) # Start the scheduler when the app starts @app.on_event("startup") async def start_scheduler(): scheduler.start() # Shut down the scheduler when the app stops @app.on_event("shutdown") async def shutdown_scheduler(): scheduler.shutdown() def get_newsletter_list(): # Fetch the list of newsletters from the Hugging Face repository files = api.list_repo_files(repo_id=DATASET_NAME, repo_type="dataset") newsletters = [f for f in files if f.endswith('newsletter.json')] return sorted(newsletters, reverse=True) def get_newsletter_content(path): # Download and parse the newsletter content content = api.hf_hub_download(repo_id=DATASET_NAME, filename=path, repo_type="dataset") with open(content, 'r') as f: return json.load(f) def check_format_exists(date: str, format: str) -> bool: """Check if a specific format exists for a given date""" try: api.hf_hub_download( repo_id=DATASET_NAME, filename=f"{date}/newsletter.{format}", repo_type="dataset" ) return True except Exception: return False @app.get("/") def index(): newsletters = get_newsletter_list() links = [ Li( A(datetime.strptime(n.split('/')[0], '%Y%m%d').strftime('%B %d, %Y'), href=f"/newsletter/{n.split('/')[0]}")) for n in newsletters ] return Titled("This Week in Rheumatology", H2("Available Newsletters"), Ul(*links)) @app.get("/newsletter/{date}") def newsletter(date: str): path = f"{date}/newsletter.json" try: content = get_newsletter_content(path) # Create download links div download_links = [] # Check for PDF if check_format_exists(date, "pdf"): download_links.append( A("Download PDF", href=f"/download/{date}/pdf", cls="download-link") ) # Check for EPUB if check_format_exists(date, "epub"): download_links.append( A("Download EPUB", href=f"/download/{date}/epub", cls="download-link") ) return Titled( f"This Week in Rheumatology - {content['date']}", A("Back to Index", href="/"), Div(*download_links, cls="download-links"), Div(content['content'], cls="marked")) except Exception as e: return Titled("Error", H2("Newsletter not found"), P(f"Unable to load newsletter for date: {date}"), A("Back to Index", href="/")) @app.get("/download/{date}/{format}") def download_file(date: str, format: str): try: file_path = f"{date}/newsletter.{format}" content = api.hf_hub_download(repo_id=DATASET_NAME, filename=file_path, repo_type="dataset") # Set appropriate media type and filename if format == "pdf": media_type = "application/pdf" elif format == "epub": media_type = "application/epub+zip" else: raise ValueError(f"Unsupported format: {format}") return FileResponse(content, media_type=media_type, filename=f"newsletter_{date}.{format}") except Exception as e: return Titled("Error", H2(f"{format.upper()} not found"), P(f"Unable to load {format.upper()} for date: {date}"), A("Back to Index", href="/")) serve()