yuchenlin commited on
Commit
8f3bf59
1 Parent(s): 4d69201

update results

Browse files
ZeroEval-main/result_dirs/zebra-grid.summary.json CHANGED
@@ -285,6 +285,17 @@
285
  "Total Puzzles": 1000,
286
  "Reason Lens": "1216.40"
287
  },
 
 
 
 
 
 
 
 
 
 
 
288
  {
289
  "Model": "Meta-Llama-3-8B-Instruct",
290
  "Mode": "sampling",
@@ -318,6 +329,17 @@
318
  "Total Puzzles": 1000,
319
  "Reason Lens": "1074.80"
320
  },
 
 
 
 
 
 
 
 
 
 
 
321
  {
322
  "Model": "Qwen2-7B-Instruct",
323
  "Mode": "greedy",
@@ -328,5 +350,16 @@
328
  "Hard Puzzle Acc": "0.28",
329
  "Total Puzzles": 1000,
330
  "Reason Lens": "1473.23"
 
 
 
 
 
 
 
 
 
 
 
331
  }
332
  ]
 
285
  "Total Puzzles": 1000,
286
  "Reason Lens": "1216.40"
287
  },
288
+ {
289
+ "Model": "Yi-1.5-34B-Chat",
290
+ "Mode": "greedy",
291
+ "Puzzle Acc": "11.50",
292
+ "Cell Acc": "32.73",
293
+ "No answer": "4.40",
294
+ "Easy Puzzle Acc": "37.50",
295
+ "Hard Puzzle Acc": "1.39",
296
+ "Total Puzzles": 1000,
297
+ "Reason Lens": "869.65"
298
+ },
299
  {
300
  "Model": "Meta-Llama-3-8B-Instruct",
301
  "Mode": "sampling",
 
329
  "Total Puzzles": 1000,
330
  "Reason Lens": "1074.80"
331
  },
332
+ {
333
+ "Model": "mathstral-7B-v0.1",
334
+ "Mode": "greedy",
335
+ "Puzzle Acc": "9.00",
336
+ "Cell Acc": "20.42",
337
+ "No answer": "36.00",
338
+ "Easy Puzzle Acc": "30.00",
339
+ "Hard Puzzle Acc": "0.83",
340
+ "Total Puzzles": 1000,
341
+ "Reason Lens": "1148.16"
342
+ },
343
  {
344
  "Model": "Qwen2-7B-Instruct",
345
  "Mode": "greedy",
 
350
  "Hard Puzzle Acc": "0.28",
351
  "Total Puzzles": 1000,
352
  "Reason Lens": "1473.23"
353
+ },
354
+ {
355
+ "Model": "Yi-1.5-9B-Chat",
356
+ "Mode": "greedy",
357
+ "Puzzle Acc": "2.30",
358
+ "Cell Acc": "7.53",
359
+ "No answer": "11.30",
360
+ "Easy Puzzle Acc": "8.21",
361
+ "Hard Puzzle Acc": "0.00",
362
+ "Total Puzzles": 1000,
363
+ "Reason Lens": "1592.60"
364
  }
365
  ]