lewtun HF staff commited on
Commit
1a3600d
·
verified ·
1 Parent(s): af351aa

Upload eval_results/HuggingFaceH4/mistral-7b-ift/v48.21/eval_gsm8k.json with huggingface_hub

Browse files
eval_results/HuggingFaceH4/mistral-7b-ift/v48.21/eval_gsm8k.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "gsm8k": {
4
+ "exact_match,get-answer": 0.002274450341167551,
5
+ "exact_match_stderr,get-answer": 0.001312157814867437,
6
+ "alias": "gsm8k"
7
+ }
8
+ },
9
+ "configs": {
10
+ "gsm8k": {
11
+ "task": "gsm8k",
12
+ "group": [
13
+ "math_word_problems"
14
+ ],
15
+ "dataset_path": "gsm8k",
16
+ "dataset_name": "main",
17
+ "training_split": "train",
18
+ "test_split": "test",
19
+ "fewshot_split": "train",
20
+ "doc_to_text": "Question: {{question}}\nAnswer:",
21
+ "doc_to_target": "{{answer}}",
22
+ "description": "",
23
+ "target_delimiter": " ",
24
+ "fewshot_delimiter": "\n\n",
25
+ "num_fewshot": 5,
26
+ "metric_list": [
27
+ {
28
+ "metric": "exact_match",
29
+ "aggregation": "mean",
30
+ "higher_is_better": true,
31
+ "ignore_case": true,
32
+ "ignore_punctuation": false,
33
+ "regexes_to_ignore": [
34
+ ",",
35
+ "\\$",
36
+ "(?s).*#### "
37
+ ]
38
+ }
39
+ ],
40
+ "output_type": "generate_until",
41
+ "generation_kwargs": {
42
+ "until": [
43
+ "\n\n",
44
+ "Question:"
45
+ ],
46
+ "do_sample": false,
47
+ "temperature": 0.0
48
+ },
49
+ "repeats": 1,
50
+ "filter_list": [
51
+ {
52
+ "name": "get-answer",
53
+ "filter": [
54
+ {
55
+ "function": "regex",
56
+ "regex_pattern": "#### (\\-?[0-9\\.\\,]+)"
57
+ },
58
+ {
59
+ "function": "take_first"
60
+ }
61
+ ]
62
+ }
63
+ ],
64
+ "should_decontaminate": false,
65
+ "metadata": {
66
+ "version": 2.0
67
+ }
68
+ }
69
+ },
70
+ "versions": {
71
+ "gsm8k": 2.0
72
+ },
73
+ "n-shot": {
74
+ "gsm8k": 5
75
+ },
76
+ "config": {
77
+ "model": "hf",
78
+ "model_args": "pretrained=HuggingFaceH4/mistral-7b-ift,revision=v48.21,dtype=bfloat16",
79
+ "batch_size": "auto",
80
+ "batch_sizes": [],
81
+ "device": null,
82
+ "use_cache": null,
83
+ "limit": null,
84
+ "bootstrap_iters": 100000,
85
+ "gen_kwargs": null
86
+ },
87
+ "git_hash": "8a6546c"
88
+ }