m-ric HF staff commited on
Commit
e2de670
·
verified ·
1 Parent(s): 135ada9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -45
app.py CHANGED
@@ -28,55 +28,50 @@ app.add_middleware(
28
 
29
  @app.get("/api/results")
30
  async def get_results():
31
- try:
32
- # Load the dataset
33
- dataset = load_dataset("smolagents/results")
34
- # Convert to list for processing
35
- df = dataset["train"].to_pandas()
36
-
37
- # Log some info to help debug
38
- print("Dataset loaded, shape:", df.shape)
39
- print("Columns:", df.columns)
40
 
41
- # Process the data to match frontend expectations
42
- result = []
43
- # Ensure we have the expected columns
44
- expected_columns = ['model_id', 'agent_action_type', 'benchmark', 'score']
45
- for col in expected_columns:
46
- if col not in df.columns:
47
- print(f"Warning: Column {col} not found in dataset")
48
-
49
- # Group by model_id and agent_action_type to create the expected structure
50
- for (model_id, agent_action_type), group in df.groupby(['model_id', 'agent_action_type']):
51
- # Calculate scores for each benchmark
52
- benchmark_scores = {}
53
- benchmarks = ['GAIA', 'MATH', 'SimpleQA']
54
-
55
- for benchmark in benchmarks:
56
- benchmark_group = group[group['benchmark'] == benchmark]
57
- if not benchmark_group.empty:
58
- benchmark_scores[benchmark] = benchmark_group['score'].mean() * 100 # Convert to percentage
59
-
60
- # Calculate average if we have at least one benchmark score
61
- if benchmark_scores:
62
- benchmark_scores['Average'] = sum(benchmark_scores.values()) / len(benchmark_scores)
63
 
64
- # Add entry to result
65
- result.append({
66
- 'model_id': model_id,
67
- 'agent_action_type': agent_action_type,
68
- 'scores': benchmark_scores
69
- })
 
 
 
 
 
 
 
 
70
 
71
- print(f"Processed {len(result)} entries for the frontend")
72
- # Return the properly formatted data as a JSON response
73
- return result
74
- return data
 
 
 
 
 
 
 
75
 
76
- except Exception as e:
77
- # Print the full error traceback to your logs
78
- print("Error occurred:", str(e))
79
- raise HTTPException(status_code=500, detail=str(e))
80
 
81
 
82
 
 
28
 
29
  @app.get("/api/results")
30
  async def get_results():
31
+ # Load the dataset
32
+ dataset = load_dataset("smolagents/results")
33
+ # Convert to list for processing
34
+ df = dataset["train"].to_pandas()
35
+
36
+ # Log some info to help debug
37
+ print("Dataset loaded, shape:", df.shape)
38
+ print("Columns:", df.columns)
 
39
 
40
+ # Process the data to match frontend expectations
41
+ result = []
42
+ # Ensure we have the expected columns
43
+ expected_columns = ['model_id', 'agent_action_type', 'source', 'acc']
44
+ for col in expected_columns:
45
+ if col not in df.columns:
46
+ print(f"Warning: Column {col} not found in dataset")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ # Group by model_id and agent_action_type to create the expected structure
49
+ for (model_id, agent_action_type), group in df.groupby(['model_id', 'agent_action_type']):
50
+ # Calculate scores for each benchmark
51
+ benchmark_scores = {}
52
+ benchmarks = ['GAIA', 'MATH', 'SimpleQA']
53
+
54
+ for benchmark in benchmarks:
55
+ benchmark_group = group[group['source'] == benchmark]
56
+ if not benchmark_group.empty:
57
+ benchmark_scores[benchmark] = benchmark_group['acc'].mean() * 100 # Convert to percentage
58
+
59
+ # Calculate average if we have at least one benchmark score
60
+ if benchmark_scores:
61
+ benchmark_scores['Average'] = sum(benchmark_scores.values()) / len(benchmark_scores)
62
 
63
+ # Add entry to result
64
+ result.append({
65
+ 'model_id': model_id,
66
+ 'agent_action_type': agent_action_type,
67
+ 'scores': benchmark_scores
68
+ })
69
+
70
+ print(f"Processed {len(result)} entries for the frontend")
71
+ # Return the properly formatted data as a JSON response
72
+ return result
73
+ return data
74
 
 
 
 
 
75
 
76
 
77