m-ric HF staff commited on
Commit
135ada9
·
verified ·
1 Parent(s): a89f886

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -9
app.py CHANGED
@@ -31,19 +31,46 @@ async def get_results():
31
  try:
32
  # Load the dataset
33
  dataset = load_dataset("smolagents/results")
34
-
35
  # Convert to list for processing
36
- data = dataset["train"].to_pandas()
37
 
38
  # Log some info to help debug
39
- print("Dataset loaded, shape:", data.shape)
40
- print("Columns:", data.columns)
41
- print("First row:", data.iloc[0])
42
-
43
- # Process the data to group by model and calculate scores
44
- processed_data = []
45
- grouped = data.groupby('model_id')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
 
 
 
47
  return data
48
 
49
  except Exception as e:
 
31
  try:
32
  # Load the dataset
33
  dataset = load_dataset("smolagents/results")
 
34
  # Convert to list for processing
35
+ df = dataset["train"].to_pandas()
36
 
37
  # Log some info to help debug
38
+ print("Dataset loaded, shape:", df.shape)
39
+ print("Columns:", df.columns)
40
+
41
+ # Process the data to match frontend expectations
42
+ result = []
43
+ # Ensure we have the expected columns
44
+ expected_columns = ['model_id', 'agent_action_type', 'benchmark', 'score']
45
+ for col in expected_columns:
46
+ if col not in df.columns:
47
+ print(f"Warning: Column {col} not found in dataset")
48
+
49
+ # Group by model_id and agent_action_type to create the expected structure
50
+ for (model_id, agent_action_type), group in df.groupby(['model_id', 'agent_action_type']):
51
+ # Calculate scores for each benchmark
52
+ benchmark_scores = {}
53
+ benchmarks = ['GAIA', 'MATH', 'SimpleQA']
54
+
55
+ for benchmark in benchmarks:
56
+ benchmark_group = group[group['benchmark'] == benchmark]
57
+ if not benchmark_group.empty:
58
+ benchmark_scores[benchmark] = benchmark_group['score'].mean() * 100 # Convert to percentage
59
+
60
+ # Calculate average if we have at least one benchmark score
61
+ if benchmark_scores:
62
+ benchmark_scores['Average'] = sum(benchmark_scores.values()) / len(benchmark_scores)
63
+
64
+ # Add entry to result
65
+ result.append({
66
+ 'model_id': model_id,
67
+ 'agent_action_type': agent_action_type,
68
+ 'scores': benchmark_scores
69
+ })
70
 
71
+ print(f"Processed {len(result)} entries for the frontend")
72
+ # Return the properly formatted data as a JSON response
73
+ return result
74
  return data
75
 
76
  except Exception as e: