Sharathhebbar24 commited on
Commit
36634ac
·
verified ·
1 Parent(s): 2c35dac

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +386 -0
README.md CHANGED
@@ -48,4 +48,390 @@ prompt.
48
  >>> """
49
  >>> res = generate_text(prompt)
50
  >>> res
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  ```
 
48
  >>> """
49
  >>> res = generate_text(prompt)
50
  >>> res
51
+ ```
52
+
53
+ ## Leaderboard
54
+
55
+ ```python
56
+ {
57
+ "all": {
58
+ "acc": 0.24915779048270345,
59
+ "acc_stderr": 0.030509906389610868,
60
+ "acc_norm": 0.25041231816215265,
61
+ "acc_norm_stderr": 0.03132600249114931,
62
+ "mc1": 0.2521419828641371,
63
+ "mc1_stderr": 0.015201522246299965,
64
+ "mc2": 0.41257163824244014,
65
+ "mc2_stderr": 0.015127188811834062
66
+ },
67
+ "harness|arc:challenge|25": {
68
+ "acc": 0.18686006825938567,
69
+ "acc_stderr": 0.011391015649694391,
70
+ "acc_norm": 0.23976109215017063,
71
+ "acc_norm_stderr": 0.012476304127453954
72
+ },
73
+ "harness|hellaswag|10": {
74
+ "acc": 0.28978291177056364,
75
+ "acc_stderr": 0.004527343651130803,
76
+ "acc_norm": 0.3121888070105557,
77
+ "acc_norm_stderr": 0.0046243936909668975
78
+ },
79
+ "harness|hendrycksTest-abstract_algebra|5": {
80
+ "acc": 0.22,
81
+ "acc_stderr": 0.04163331998932268,
82
+ "acc_norm": 0.22,
83
+ "acc_norm_stderr": 0.04163331998932268
84
+ },
85
+ "harness|hendrycksTest-anatomy|5": {
86
+ "acc": 0.3037037037037037,
87
+ "acc_stderr": 0.039725528847851375,
88
+ "acc_norm": 0.3037037037037037,
89
+ "acc_norm_stderr": 0.039725528847851375
90
+ },
91
+ "harness|hendrycksTest-astronomy|5": {
92
+ "acc": 0.17763157894736842,
93
+ "acc_stderr": 0.031103182383123398,
94
+ "acc_norm": 0.17763157894736842,
95
+ "acc_norm_stderr": 0.031103182383123398
96
+ },
97
+ "harness|hendrycksTest-business_ethics|5": {
98
+ "acc": 0.26,
99
+ "acc_stderr": 0.0440844002276808,
100
+ "acc_norm": 0.26,
101
+ "acc_norm_stderr": 0.0440844002276808
102
+ },
103
+ "harness|hendrycksTest-clinical_knowledge|5": {
104
+ "acc": 0.23018867924528302,
105
+ "acc_stderr": 0.025907897122408173,
106
+ "acc_norm": 0.23018867924528302,
107
+ "acc_norm_stderr": 0.025907897122408173
108
+ },
109
+ "harness|hendrycksTest-college_biology|5": {
110
+ "acc": 0.2569444444444444,
111
+ "acc_stderr": 0.03653946969442099,
112
+ "acc_norm": 0.2569444444444444,
113
+ "acc_norm_stderr": 0.03653946969442099
114
+ },
115
+ "harness|hendrycksTest-college_chemistry|5": {
116
+ "acc": 0.19,
117
+ "acc_stderr": 0.039427724440366234,
118
+ "acc_norm": 0.19,
119
+ "acc_norm_stderr": 0.039427724440366234
120
+ },
121
+ "harness|hendrycksTest-college_computer_science|5": {
122
+ "acc": 0.24,
123
+ "acc_stderr": 0.04292346959909283,
124
+ "acc_norm": 0.24,
125
+ "acc_norm_stderr": 0.04292346959909283
126
+ },
127
+ "harness|hendrycksTest-college_mathematics|5": {
128
+ "acc": 0.29,
129
+ "acc_stderr": 0.04560480215720684,
130
+ "acc_norm": 0.29,
131
+ "acc_norm_stderr": 0.04560480215720684
132
+ },
133
+ "harness|hendrycksTest-college_medicine|5": {
134
+ "acc": 0.2543352601156069,
135
+ "acc_stderr": 0.0332055644308557,
136
+ "acc_norm": 0.2543352601156069,
137
+ "acc_norm_stderr": 0.0332055644308557
138
+ },
139
+ "harness|hendrycksTest-college_physics|5": {
140
+ "acc": 0.21568627450980393,
141
+ "acc_stderr": 0.04092563958237654,
142
+ "acc_norm": 0.21568627450980393,
143
+ "acc_norm_stderr": 0.04092563958237654
144
+ },
145
+ "harness|hendrycksTest-computer_security|5": {
146
+ "acc": 0.34,
147
+ "acc_stderr": 0.04760952285695236,
148
+ "acc_norm": 0.34,
149
+ "acc_norm_stderr": 0.04760952285695236
150
+ },
151
+ "harness|hendrycksTest-conceptual_physics|5": {
152
+ "acc": 0.26382978723404255,
153
+ "acc_stderr": 0.028809989854102973,
154
+ "acc_norm": 0.26382978723404255,
155
+ "acc_norm_stderr": 0.028809989854102973
156
+ },
157
+ "harness|hendrycksTest-econometrics|5": {
158
+ "acc": 0.24561403508771928,
159
+ "acc_stderr": 0.04049339297748142,
160
+ "acc_norm": 0.24561403508771928,
161
+ "acc_norm_stderr": 0.04049339297748142
162
+ },
163
+ "harness|hendrycksTest-electrical_engineering|5": {
164
+ "acc": 0.2413793103448276,
165
+ "acc_stderr": 0.03565998174135302,
166
+ "acc_norm": 0.2413793103448276,
167
+ "acc_norm_stderr": 0.03565998174135302
168
+ },
169
+ "harness|hendrycksTest-elementary_mathematics|5": {
170
+ "acc": 0.24074074074074073,
171
+ "acc_stderr": 0.02201908001221789,
172
+ "acc_norm": 0.24074074074074073,
173
+ "acc_norm_stderr": 0.02201908001221789
174
+ },
175
+ "harness|hendrycksTest-formal_logic|5": {
176
+ "acc": 0.1349206349206349,
177
+ "acc_stderr": 0.030557101589417515,
178
+ "acc_norm": 0.1349206349206349,
179
+ "acc_norm_stderr": 0.030557101589417515
180
+ },
181
+ "harness|hendrycksTest-global_facts|5": {
182
+ "acc": 0.16,
183
+ "acc_stderr": 0.03684529491774708,
184
+ "acc_norm": 0.16,
185
+ "acc_norm_stderr": 0.03684529491774708
186
+ },
187
+ "harness|hendrycksTest-high_school_biology|5": {
188
+ "acc": 0.1774193548387097,
189
+ "acc_stderr": 0.02173254068932927,
190
+ "acc_norm": 0.1774193548387097,
191
+ "acc_norm_stderr": 0.02173254068932927
192
+ },
193
+ "harness|hendrycksTest-high_school_chemistry|5": {
194
+ "acc": 0.24630541871921183,
195
+ "acc_stderr": 0.030315099285617736,
196
+ "acc_norm": 0.24630541871921183,
197
+ "acc_norm_stderr": 0.030315099285617736
198
+ },
199
+ "harness|hendrycksTest-high_school_computer_science|5": {
200
+ "acc": 0.28,
201
+ "acc_stderr": 0.04512608598542126,
202
+ "acc_norm": 0.28,
203
+ "acc_norm_stderr": 0.04512608598542126
204
+ },
205
+ "harness|hendrycksTest-high_school_european_history|5": {
206
+ "acc": 0.21818181818181817,
207
+ "acc_stderr": 0.03225078108306289,
208
+ "acc_norm": 0.21818181818181817,
209
+ "acc_norm_stderr": 0.03225078108306289
210
+ },
211
+ "harness|hendrycksTest-high_school_geography|5": {
212
+ "acc": 0.3282828282828283,
213
+ "acc_stderr": 0.03345678422756776,
214
+ "acc_norm": 0.3282828282828283,
215
+ "acc_norm_stderr": 0.03345678422756776
216
+ },
217
+ "harness|hendrycksTest-high_school_government_and_politics|5": {
218
+ "acc": 0.37305699481865284,
219
+ "acc_stderr": 0.03490205592048573,
220
+ "acc_norm": 0.37305699481865284,
221
+ "acc_norm_stderr": 0.03490205592048573
222
+ },
223
+ "harness|hendrycksTest-high_school_macroeconomics|5": {
224
+ "acc": 0.26666666666666666,
225
+ "acc_stderr": 0.02242127361292371,
226
+ "acc_norm": 0.26666666666666666,
227
+ "acc_norm_stderr": 0.02242127361292371
228
+ },
229
+ "harness|hendrycksTest-high_school_mathematics|5": {
230
+ "acc": 0.21481481481481482,
231
+ "acc_stderr": 0.025040443877000683,
232
+ "acc_norm": 0.21481481481481482,
233
+ "acc_norm_stderr": 0.025040443877000683
234
+ },
235
+ "harness|hendrycksTest-high_school_microeconomics|5": {
236
+ "acc": 0.22268907563025211,
237
+ "acc_stderr": 0.027025433498882364,
238
+ "acc_norm": 0.22268907563025211,
239
+ "acc_norm_stderr": 0.027025433498882364
240
+ },
241
+ "harness|hendrycksTest-high_school_physics|5": {
242
+ "acc": 0.23178807947019867,
243
+ "acc_stderr": 0.034454062719870546,
244
+ "acc_norm": 0.23178807947019867,
245
+ "acc_norm_stderr": 0.034454062719870546
246
+ },
247
+ "harness|hendrycksTest-high_school_psychology|5": {
248
+ "acc": 0.3302752293577982,
249
+ "acc_stderr": 0.02016446633634298,
250
+ "acc_norm": 0.3302752293577982,
251
+ "acc_norm_stderr": 0.02016446633634298
252
+ },
253
+ "harness|hendrycksTest-high_school_statistics|5": {
254
+ "acc": 0.19444444444444445,
255
+ "acc_stderr": 0.026991454502036733,
256
+ "acc_norm": 0.19444444444444445,
257
+ "acc_norm_stderr": 0.026991454502036733
258
+ },
259
+ "harness|hendrycksTest-high_school_us_history|5": {
260
+ "acc": 0.25,
261
+ "acc_stderr": 0.03039153369274154,
262
+ "acc_norm": 0.25,
263
+ "acc_norm_stderr": 0.03039153369274154
264
+ },
265
+ "harness|hendrycksTest-high_school_world_history|5": {
266
+ "acc": 0.26582278481012656,
267
+ "acc_stderr": 0.028756799629658342,
268
+ "acc_norm": 0.26582278481012656,
269
+ "acc_norm_stderr": 0.028756799629658342
270
+ },
271
+ "harness|hendrycksTest-human_aging|5": {
272
+ "acc": 0.17937219730941703,
273
+ "acc_stderr": 0.0257498195691928,
274
+ "acc_norm": 0.17937219730941703,
275
+ "acc_norm_stderr": 0.0257498195691928
276
+ },
277
+ "harness|hendrycksTest-human_sexuality|5": {
278
+ "acc": 0.2366412213740458,
279
+ "acc_stderr": 0.037276735755969174,
280
+ "acc_norm": 0.2366412213740458,
281
+ "acc_norm_stderr": 0.037276735755969174
282
+ },
283
+ "harness|hendrycksTest-international_law|5": {
284
+ "acc": 0.35537190082644626,
285
+ "acc_stderr": 0.04369236326573981,
286
+ "acc_norm": 0.35537190082644626,
287
+ "acc_norm_stderr": 0.04369236326573981
288
+ },
289
+ "harness|hendrycksTest-jurisprudence|5": {
290
+ "acc": 0.25925925925925924,
291
+ "acc_stderr": 0.042365112580946336,
292
+ "acc_norm": 0.25925925925925924,
293
+ "acc_norm_stderr": 0.042365112580946336
294
+ },
295
+ "harness|hendrycksTest-logical_fallacies|5": {
296
+ "acc": 0.2822085889570552,
297
+ "acc_stderr": 0.03536117886664742,
298
+ "acc_norm": 0.2822085889570552,
299
+ "acc_norm_stderr": 0.03536117886664742
300
+ },
301
+ "harness|hendrycksTest-machine_learning|5": {
302
+ "acc": 0.32142857142857145,
303
+ "acc_stderr": 0.04432804055291519,
304
+ "acc_norm": 0.32142857142857145,
305
+ "acc_norm_stderr": 0.04432804055291519
306
+ },
307
+ "harness|hendrycksTest-management|5": {
308
+ "acc": 0.1941747572815534,
309
+ "acc_stderr": 0.03916667762822585,
310
+ "acc_norm": 0.1941747572815534,
311
+ "acc_norm_stderr": 0.03916667762822585
312
+ },
313
+ "harness|hendrycksTest-marketing|5": {
314
+ "acc": 0.2905982905982906,
315
+ "acc_stderr": 0.02974504857267404,
316
+ "acc_norm": 0.2905982905982906,
317
+ "acc_norm_stderr": 0.02974504857267404
318
+ },
319
+ "harness|hendrycksTest-medical_genetics|5": {
320
+ "acc": 0.25,
321
+ "acc_stderr": 0.04351941398892446,
322
+ "acc_norm": 0.25,
323
+ "acc_norm_stderr": 0.04351941398892446
324
+ },
325
+ "harness|hendrycksTest-miscellaneous|5": {
326
+ "acc": 0.23627075351213284,
327
+ "acc_stderr": 0.015190473717037497,
328
+ "acc_norm": 0.23627075351213284,
329
+ "acc_norm_stderr": 0.015190473717037497
330
+ },
331
+ "harness|hendrycksTest-moral_disputes|5": {
332
+ "acc": 0.24566473988439305,
333
+ "acc_stderr": 0.02317629820399201,
334
+ "acc_norm": 0.24566473988439305,
335
+ "acc_norm_stderr": 0.02317629820399201
336
+ },
337
+ "harness|hendrycksTest-moral_scenarios|5": {
338
+ "acc": 0.24581005586592178,
339
+ "acc_stderr": 0.014400296429225587,
340
+ "acc_norm": 0.24581005586592178,
341
+ "acc_norm_stderr": 0.014400296429225587
342
+ },
343
+ "harness|hendrycksTest-nutrition|5": {
344
+ "acc": 0.25163398692810457,
345
+ "acc_stderr": 0.024848018263875195,
346
+ "acc_norm": 0.25163398692810457,
347
+ "acc_norm_stderr": 0.024848018263875195
348
+ },
349
+ "harness|hendrycksTest-philosophy|5": {
350
+ "acc": 0.18006430868167203,
351
+ "acc_stderr": 0.021823422857744953,
352
+ "acc_norm": 0.18006430868167203,
353
+ "acc_norm_stderr": 0.021823422857744953
354
+ },
355
+ "harness|hendrycksTest-prehistory|5": {
356
+ "acc": 0.25617283950617287,
357
+ "acc_stderr": 0.024288533637726095,
358
+ "acc_norm": 0.25617283950617287,
359
+ "acc_norm_stderr": 0.024288533637726095
360
+ },
361
+ "harness|hendrycksTest-professional_accounting|5": {
362
+ "acc": 0.2801418439716312,
363
+ "acc_stderr": 0.02678917235114023,
364
+ "acc_norm": 0.2801418439716312,
365
+ "acc_norm_stderr": 0.02678917235114023
366
+ },
367
+ "harness|hendrycksTest-professional_law|5": {
368
+ "acc": 0.24837027379400262,
369
+ "acc_stderr": 0.011035212598034503,
370
+ "acc_norm": 0.24837027379400262,
371
+ "acc_norm_stderr": 0.011035212598034503
372
+ },
373
+ "harness|hendrycksTest-professional_medicine|5": {
374
+ "acc": 0.3125,
375
+ "acc_stderr": 0.02815637344037142,
376
+ "acc_norm": 0.3125,
377
+ "acc_norm_stderr": 0.02815637344037142
378
+ },
379
+ "harness|hendrycksTest-professional_psychology|5": {
380
+ "acc": 0.25,
381
+ "acc_stderr": 0.01751781884501444,
382
+ "acc_norm": 0.25,
383
+ "acc_norm_stderr": 0.01751781884501444
384
+ },
385
+ "harness|hendrycksTest-public_relations|5": {
386
+ "acc": 0.18181818181818182,
387
+ "acc_stderr": 0.03694284335337801,
388
+ "acc_norm": 0.18181818181818182,
389
+ "acc_norm_stderr": 0.03694284335337801
390
+ },
391
+ "harness|hendrycksTest-security_studies|5": {
392
+ "acc": 0.31020408163265306,
393
+ "acc_stderr": 0.029613459872484378,
394
+ "acc_norm": 0.31020408163265306,
395
+ "acc_norm_stderr": 0.029613459872484378
396
+ },
397
+ "harness|hendrycksTest-sociology|5": {
398
+ "acc": 0.24875621890547264,
399
+ "acc_stderr": 0.030567675938916707,
400
+ "acc_norm": 0.24875621890547264,
401
+ "acc_norm_stderr": 0.030567675938916707
402
+ },
403
+ "harness|hendrycksTest-us_foreign_policy|5": {
404
+ "acc": 0.25,
405
+ "acc_stderr": 0.04351941398892446,
406
+ "acc_norm": 0.25,
407
+ "acc_norm_stderr": 0.04351941398892446
408
+ },
409
+ "harness|hendrycksTest-virology|5": {
410
+ "acc": 0.19879518072289157,
411
+ "acc_stderr": 0.03106939026078942,
412
+ "acc_norm": 0.19879518072289157,
413
+ "acc_norm_stderr": 0.03106939026078942
414
+ },
415
+ "harness|hendrycksTest-world_religions|5": {
416
+ "acc": 0.29239766081871343,
417
+ "acc_stderr": 0.034886477134579215,
418
+ "acc_norm": 0.29239766081871343,
419
+ "acc_norm_stderr": 0.034886477134579215
420
+ },
421
+ "harness|truthfulqa:mc|0": {
422
+ "mc1": 0.2521419828641371,
423
+ "mc1_stderr": 0.015201522246299965,
424
+ "mc2": 0.41257163824244014,
425
+ "mc2_stderr": 0.015127188811834062
426
+ },
427
+ "harness|winogrande|5": {
428
+ "acc": 0.4996053670086819,
429
+ "acc_stderr": 0.014052481306049512
430
+ },
431
+ "harness|gsm8k|5": {
432
+ "acc": 0.0,
433
+ "acc_stderr": 0.0
434
+ }
435
+ }
436
+
437
  ```