from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from fastapi.staticfiles import StaticFiles import numpy as np import argparse import os from datasets import load_dataset HOST = os.environ.get("API_URL", "0.0.0.0") PORT = os.environ.get("PORT", 7860) parser = argparse.ArgumentParser() parser.add_argument("--host", default=HOST) parser.add_argument("--port", type=int, default=PORT) parser.add_argument("--reload", action="store_true", default=True) parser.add_argument("--ssl_certfile") parser.add_argument("--ssl_keyfile") args = parser.parse_args() app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) @app.get("/api/results") async def get_results(): # Load the dataset dataset = load_dataset("smolagents/results") # Convert to list for processing df = dataset["train"].to_pandas() # Log some info to help debug print("Dataset loaded, shape:", df.shape) print("Columns:", df.columns) # Process the data to match frontend expectations result = [] # Ensure we have the expected columns expected_columns = ['model_id', 'agent_action_type', 'source', 'acc'] for col in expected_columns: if col not in df.columns: print(f"Warning: Column {col} not found in dataset") # Group by model_id and agent_action_type to create the expected structure for (model_id, agent_action_type), group in df.groupby(['model_id', 'agent_action_type']): # Calculate scores for each benchmark benchmark_scores = {} benchmarks = ['GAIA', 'MATH', 'SimpleQA'] for benchmark in benchmarks: benchmark_group = group[group['source'] == benchmark] if not benchmark_group.empty: benchmark_scores[benchmark] = benchmark_group['acc'].mean() * 100 # Convert to percentage # Calculate average if we have at least one benchmark score if benchmark_scores: benchmark_scores['Average'] = sum(benchmark_scores.values()) / len(benchmark_scores) # Add entry to result result.append({ 'model_id': model_id, 'agent_action_type': agent_action_type, 'scores': benchmark_scores }) print(f"Processed {len(result)} entries for the frontend") # Return the properly formatted data as a JSON response return result return data app.mount("/", StaticFiles(directory="static", html=True), name="static") if __name__ == "__main__": import uvicorn print(args) uvicorn.run( "app:app", host=args.host, port=args.port, reload=args.reload, ssl_certfile=args.ssl_certfile, ssl_keyfile=args.ssl_keyfile, )