StringChaos commited on
Commit
6fe41a3
·
1 Parent(s): b3bc767

explorer updated evals

Browse files
all_outputs.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:395624aed41b3527d333818c1ee9f7d537b937e7b0eaa0d3cf05fa0743a2fbbd
3
- size 2528262
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8fa8293294ed03c607e4c0d861b50203bd6aee395414d8f5d9d7e9a7853acc9
3
+ size 284487753
app.py CHANGED
@@ -56,6 +56,7 @@ all_evaluations_by_problem_colored = [
56
  for model in all_models
57
  },
58
  problems[idx]["difficulty"],
 
59
  )
60
  for trueidx, idx in enumerate(random_idxs)
61
  ]
@@ -110,6 +111,7 @@ def problem(problem_idx):
110
  mini_models = [
111
  # "DeepSeek-V2",
112
  "DeepSeek-V3",
 
113
  # "DSCoder-33b-Ins",
114
  # "GPT-4-Turbo-2024-04-09",
115
  "GPT-4O-2024-05-13",
@@ -117,8 +119,8 @@ mini_models = [
117
  "Gemini-Flash-2.0-Thinking",
118
  # "Gemini-Exp-1206",
119
  # "Claude-3-Sonnet",
120
- "O1-Mini-2024-09-12 (N=1)",
121
  "O1-2024-12-17 (N=1) (High)",
 
122
  ]
123
 
124
 
 
56
  for model in all_models
57
  },
58
  problems[idx]["difficulty"],
59
+ problems[idx]["question_id"],
60
  )
61
  for trueidx, idx in enumerate(random_idxs)
62
  ]
 
111
  mini_models = [
112
  # "DeepSeek-V2",
113
  "DeepSeek-V3",
114
+ "DeepSeek-R1-Preview",
115
  # "DSCoder-33b-Ins",
116
  # "GPT-4-Turbo-2024-04-09",
117
  "GPT-4O-2024-05-13",
 
119
  "Gemini-Flash-2.0-Thinking",
120
  # "Gemini-Exp-1206",
121
  # "Claude-3-Sonnet",
 
122
  "O1-2024-12-17 (N=1) (High)",
123
+ "QwQ-32B-Preview (N=1)",
124
  ]
125
 
126
 
templates/index.html CHANGED
@@ -86,7 +86,7 @@
86
  <table id='model-table' style="align:center">
87
  <thead>
88
  <tr>
89
- <th>Problem ID</th>
90
  <th>Difficulty</th>
91
  {% for model in models %}
92
  <th class="column-{{ model }}">{{ model }}</th>
@@ -94,9 +94,9 @@
94
  </tr>
95
  </thead>
96
  <tbody>
97
- {% for problem_idx, problem, difficulty in problems %}
98
  <tr>
99
- <td> <a href="{{ url_for('problem', problem_idx=problem_idx) }}"> {{ problem_idx }} </a> </td>
100
  <td> {{ difficulty }} </td>
101
  {% for model in models %}
102
  <td style="background-color: {{ problem[model]['correctness_color'] }};" class="column-{{ model }}">
 
86
  <table id='model-table' style="align:center">
87
  <thead>
88
  <tr>
89
+ <th>Question ID</th>
90
  <th>Difficulty</th>
91
  {% for model in models %}
92
  <th class="column-{{ model }}">{{ model }}</th>
 
94
  </tr>
95
  </thead>
96
  <tbody>
97
+ {% for problem_idx, problem, difficulty, question_id in problems %}
98
  <tr>
99
+ <td> <a href="{{ url_for('problem', problem_idx=problem_idx) }}"> {{ question_id }} </a> </td>
100
  <td> {{ difficulty }} </td>
101
  {% for model in models %}
102
  <td style="background-color: {{ problem[model]['correctness_color'] }};" class="column-{{ model }}">
templates/index_mini.html CHANGED
@@ -85,7 +85,7 @@
85
  <table id='model-table' style="align:center">
86
  <thead>
87
  <tr>
88
- <th>Problem ID</th>
89
  <th>Difficulty</th>
90
  {% for model in models %}
91
  <th class="column-{{ model }}">{{ model }}</th>
@@ -93,9 +93,9 @@
93
  </tr>
94
  </thead>
95
  <tbody>
96
- {% for problem_idx, problem, difficulty in problems %}
97
  <tr>
98
- <td> <a href="{{ url_for('problem_mini', problem_idx=problem_idx) }}"> {{ problem_idx }} </a> </td>
99
  <td> {{ difficulty }} </td>
100
  {% for model in models %}
101
  <td style="background-color: {{ problem[model]['correctness_color'] }};" class="column-{{ model }}">
 
85
  <table id='model-table' style="align:center">
86
  <thead>
87
  <tr>
88
+ <th>Question ID</th>
89
  <th>Difficulty</th>
90
  {% for model in models %}
91
  <th class="column-{{ model }}">{{ model }}</th>
 
93
  </tr>
94
  </thead>
95
  <tbody>
96
+ {% for problem_idx, problem, difficulty, question_id in problems %}
97
  <tr>
98
+ <td> <a href="{{ url_for('problem_mini', problem_idx=problem_idx) }}"> {{ question_id }} </a> </td>
99
  <td> {{ difficulty }} </td>
100
  {% for model in models %}
101
  <td style="background-color: {{ problem[model]['correctness_color'] }};" class="column-{{ model }}">