zhonghhh commited on
Commit
39a71b9
·
verified ·
1 Parent(s): 282da65

Upload 10 files

Browse files
config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.3.1",
4
+ "transformers": "4.48.0",
5
+ "pytorch": "2.4.0+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a37f668d9813f9ee05d3e3ea2f5bbae68028ad25b178c25408d4742f4ab3e555
3
+ size 410665792
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "max_length": 256,
51
+ "model_max_length": 512,
52
+ "never_split": null,
53
+ "pad_to_multiple_of": null,
54
+ "pad_token": "[PAD]",
55
+ "pad_token_type_id": 0,
56
+ "padding_side": "right",
57
+ "sep_token": "[SEP]",
58
+ "stride": 0,
59
+ "strip_accents": null,
60
+ "tokenize_chinese_chars": true,
61
+ "tokenizer_class": "BertTokenizer",
62
+ "truncation_side": "right",
63
+ "truncation_strategy": "longest_first",
64
+ "unk_token": "[UNK]"
65
+ }
trainer_state.json ADDED
@@ -0,0 +1,1005 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 12.0,
5
+ "eval_steps": 500,
6
+ "global_step": 90492,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 0.18677596747875214,
14
+ "learning_rate": 1.3320367751060821e-05,
15
+ "loss": 1.6931,
16
+ "step": 7541
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_AFQMC_loss": 0.02504170872271061,
21
+ "eval_AFQMC_runtime": 99.0724,
22
+ "eval_AFQMC_samples_per_second": 43.564,
23
+ "eval_AFQMC_steps_per_second": 0.343,
24
+ "eval_emb_eval_pearson_cosine": 0.49435896960848713,
25
+ "eval_emb_eval_spearman_cosine": 0.5280909639875639,
26
+ "step": 7541
27
+ },
28
+ {
29
+ "epoch": 1.0,
30
+ "eval_ATEC_loss": 0.01953265070915222,
31
+ "eval_ATEC_runtime": 6.6017,
32
+ "eval_ATEC_samples_per_second": 3029.521,
33
+ "eval_ATEC_steps_per_second": 23.782,
34
+ "step": 7541
35
+ },
36
+ {
37
+ "epoch": 1.0,
38
+ "eval_BQ_loss": 0.01803545467555523,
39
+ "eval_BQ_runtime": 3.4373,
40
+ "eval_BQ_samples_per_second": 2909.235,
41
+ "eval_BQ_steps_per_second": 22.983,
42
+ "step": 7541
43
+ },
44
+ {
45
+ "epoch": 1.0,
46
+ "eval_Cmnli_loss": 0.020238544791936874,
47
+ "eval_Cmnli_runtime": 3.9735,
48
+ "eval_Cmnli_samples_per_second": 2092.612,
49
+ "eval_Cmnli_steps_per_second": 16.358,
50
+ "step": 7541
51
+ },
52
+ {
53
+ "epoch": 1.0,
54
+ "eval_LCQMC_loss": 0.03518352285027504,
55
+ "eval_LCQMC_runtime": 2.3275,
56
+ "eval_LCQMC_samples_per_second": 3781.698,
57
+ "eval_LCQMC_steps_per_second": 29.645,
58
+ "step": 7541
59
+ },
60
+ {
61
+ "epoch": 1.0,
62
+ "eval_Ocnli_loss": 0.025195566937327385,
63
+ "eval_Ocnli_runtime": 0.6266,
64
+ "eval_Ocnli_samples_per_second": 2947.795,
65
+ "eval_Ocnli_steps_per_second": 23.94,
66
+ "step": 7541
67
+ },
68
+ {
69
+ "epoch": 1.0,
70
+ "eval_PAWSX_loss": 0.067341148853302,
71
+ "eval_PAWSX_runtime": 0.9538,
72
+ "eval_PAWSX_samples_per_second": 2096.982,
73
+ "eval_PAWSX_steps_per_second": 16.776,
74
+ "step": 7541
75
+ },
76
+ {
77
+ "epoch": 1.0,
78
+ "eval_QBQTC_loss": 8.280766487121582,
79
+ "eval_QBQTC_runtime": 7.6679,
80
+ "eval_QBQTC_samples_per_second": 2608.269,
81
+ "eval_QBQTC_steps_per_second": 20.475,
82
+ "step": 7541
83
+ },
84
+ {
85
+ "epoch": 1.0,
86
+ "eval_STS-B_loss": 8.6417875289917,
87
+ "eval_STS-B_runtime": 0.5653,
88
+ "eval_STS-B_samples_per_second": 2579.365,
89
+ "eval_STS-B_steps_per_second": 21.229,
90
+ "step": 7541
91
+ },
92
+ {
93
+ "epoch": 2.0,
94
+ "grad_norm": 0.29912641644477844,
95
+ "learning_rate": 1.92609255130006e-05,
96
+ "loss": 1.5717,
97
+ "step": 15082
98
+ },
99
+ {
100
+ "epoch": 2.0,
101
+ "eval_AFQMC_loss": 0.023414788767695427,
102
+ "eval_AFQMC_runtime": 99.0838,
103
+ "eval_AFQMC_samples_per_second": 43.559,
104
+ "eval_AFQMC_steps_per_second": 0.343,
105
+ "eval_emb_eval_pearson_cosine": 0.541482089845575,
106
+ "eval_emb_eval_spearman_cosine": 0.584542245914602,
107
+ "step": 15082
108
+ },
109
+ {
110
+ "epoch": 2.0,
111
+ "eval_ATEC_loss": 0.01733938232064247,
112
+ "eval_ATEC_runtime": 6.5234,
113
+ "eval_ATEC_samples_per_second": 3065.879,
114
+ "eval_ATEC_steps_per_second": 24.067,
115
+ "step": 15082
116
+ },
117
+ {
118
+ "epoch": 2.0,
119
+ "eval_BQ_loss": 0.015580276027321815,
120
+ "eval_BQ_runtime": 3.419,
121
+ "eval_BQ_samples_per_second": 2924.795,
122
+ "eval_BQ_steps_per_second": 23.106,
123
+ "step": 15082
124
+ },
125
+ {
126
+ "epoch": 2.0,
127
+ "eval_Cmnli_loss": 0.016321830451488495,
128
+ "eval_Cmnli_runtime": 3.7867,
129
+ "eval_Cmnli_samples_per_second": 2195.824,
130
+ "eval_Cmnli_steps_per_second": 17.165,
131
+ "step": 15082
132
+ },
133
+ {
134
+ "epoch": 2.0,
135
+ "eval_LCQMC_loss": 0.02696206048130989,
136
+ "eval_LCQMC_runtime": 2.3672,
137
+ "eval_LCQMC_samples_per_second": 3718.264,
138
+ "eval_LCQMC_steps_per_second": 29.148,
139
+ "step": 15082
140
+ },
141
+ {
142
+ "epoch": 2.0,
143
+ "eval_Ocnli_loss": 0.018722666427493095,
144
+ "eval_Ocnli_runtime": 0.6105,
145
+ "eval_Ocnli_samples_per_second": 3025.526,
146
+ "eval_Ocnli_steps_per_second": 24.571,
147
+ "step": 15082
148
+ },
149
+ {
150
+ "epoch": 2.0,
151
+ "eval_PAWSX_loss": 0.06007164344191551,
152
+ "eval_PAWSX_runtime": 0.9605,
153
+ "eval_PAWSX_samples_per_second": 2082.25,
154
+ "eval_PAWSX_steps_per_second": 16.658,
155
+ "step": 15082
156
+ },
157
+ {
158
+ "epoch": 2.0,
159
+ "eval_QBQTC_loss": 8.381814002990723,
160
+ "eval_QBQTC_runtime": 7.644,
161
+ "eval_QBQTC_samples_per_second": 2616.427,
162
+ "eval_QBQTC_steps_per_second": 20.539,
163
+ "step": 15082
164
+ },
165
+ {
166
+ "epoch": 2.0,
167
+ "eval_STS-B_loss": 9.617568969726562,
168
+ "eval_STS-B_runtime": 0.5589,
169
+ "eval_STS-B_samples_per_second": 2608.657,
170
+ "eval_STS-B_steps_per_second": 21.47,
171
+ "step": 15082
172
+ },
173
+ {
174
+ "epoch": 3.0,
175
+ "grad_norm": 0.2338990569114685,
176
+ "learning_rate": 1.7779829671031307e-05,
177
+ "loss": 1.5301,
178
+ "step": 22623
179
+ },
180
+ {
181
+ "epoch": 3.0,
182
+ "eval_AFQMC_loss": 0.02241475135087967,
183
+ "eval_AFQMC_runtime": 99.9714,
184
+ "eval_AFQMC_samples_per_second": 43.172,
185
+ "eval_AFQMC_steps_per_second": 0.34,
186
+ "eval_emb_eval_pearson_cosine": 0.5913082581080403,
187
+ "eval_emb_eval_spearman_cosine": 0.6383284889220768,
188
+ "step": 22623
189
+ },
190
+ {
191
+ "epoch": 3.0,
192
+ "eval_ATEC_loss": 0.016517719253897667,
193
+ "eval_ATEC_runtime": 6.5842,
194
+ "eval_ATEC_samples_per_second": 3037.594,
195
+ "eval_ATEC_steps_per_second": 23.845,
196
+ "step": 22623
197
+ },
198
+ {
199
+ "epoch": 3.0,
200
+ "eval_BQ_loss": 0.014284521341323853,
201
+ "eval_BQ_runtime": 3.406,
202
+ "eval_BQ_samples_per_second": 2936.005,
203
+ "eval_BQ_steps_per_second": 23.194,
204
+ "step": 22623
205
+ },
206
+ {
207
+ "epoch": 3.0,
208
+ "eval_Cmnli_loss": 0.01529396791011095,
209
+ "eval_Cmnli_runtime": 3.7388,
210
+ "eval_Cmnli_samples_per_second": 2223.987,
211
+ "eval_Cmnli_steps_per_second": 17.385,
212
+ "step": 22623
213
+ },
214
+ {
215
+ "epoch": 3.0,
216
+ "eval_LCQMC_loss": 0.020633986219763756,
217
+ "eval_LCQMC_runtime": 2.3364,
218
+ "eval_LCQMC_samples_per_second": 3767.314,
219
+ "eval_LCQMC_steps_per_second": 29.532,
220
+ "step": 22623
221
+ },
222
+ {
223
+ "epoch": 3.0,
224
+ "eval_Ocnli_loss": 0.016242269426584244,
225
+ "eval_Ocnli_runtime": 0.6209,
226
+ "eval_Ocnli_samples_per_second": 2974.536,
227
+ "eval_Ocnli_steps_per_second": 24.157,
228
+ "step": 22623
229
+ },
230
+ {
231
+ "epoch": 3.0,
232
+ "eval_PAWSX_loss": 0.05045855790376663,
233
+ "eval_PAWSX_runtime": 0.9593,
234
+ "eval_PAWSX_samples_per_second": 2084.846,
235
+ "eval_PAWSX_steps_per_second": 16.679,
236
+ "step": 22623
237
+ },
238
+ {
239
+ "epoch": 3.0,
240
+ "eval_QBQTC_loss": 8.840312957763672,
241
+ "eval_QBQTC_runtime": 7.6443,
242
+ "eval_QBQTC_samples_per_second": 2616.334,
243
+ "eval_QBQTC_steps_per_second": 20.538,
244
+ "step": 22623
245
+ },
246
+ {
247
+ "epoch": 3.0,
248
+ "eval_STS-B_loss": 9.837035179138184,
249
+ "eval_STS-B_runtime": 0.5658,
250
+ "eval_STS-B_samples_per_second": 2577.011,
251
+ "eval_STS-B_steps_per_second": 21.21,
252
+ "step": 22623
253
+ },
254
+ {
255
+ "epoch": 4.0,
256
+ "grad_norm": 0.1766250729560852,
257
+ "learning_rate": 1.629912674479141e-05,
258
+ "loss": 1.4768,
259
+ "step": 30164
260
+ },
261
+ {
262
+ "epoch": 4.0,
263
+ "eval_AFQMC_loss": 0.021405402570962906,
264
+ "eval_AFQMC_runtime": 100.3018,
265
+ "eval_AFQMC_samples_per_second": 43.03,
266
+ "eval_AFQMC_steps_per_second": 0.339,
267
+ "eval_emb_eval_pearson_cosine": 0.6125367634857398,
268
+ "eval_emb_eval_spearman_cosine": 0.6570670709310811,
269
+ "step": 30164
270
+ },
271
+ {
272
+ "epoch": 4.0,
273
+ "eval_ATEC_loss": 0.015135602094233036,
274
+ "eval_ATEC_runtime": 6.5473,
275
+ "eval_ATEC_samples_per_second": 3054.678,
276
+ "eval_ATEC_steps_per_second": 23.979,
277
+ "step": 30164
278
+ },
279
+ {
280
+ "epoch": 4.0,
281
+ "eval_BQ_loss": 0.013757712207734585,
282
+ "eval_BQ_runtime": 3.4539,
283
+ "eval_BQ_samples_per_second": 2895.245,
284
+ "eval_BQ_steps_per_second": 22.872,
285
+ "step": 30164
286
+ },
287
+ {
288
+ "epoch": 4.0,
289
+ "eval_Cmnli_loss": 0.015052303671836853,
290
+ "eval_Cmnli_runtime": 3.7534,
291
+ "eval_Cmnli_samples_per_second": 2215.324,
292
+ "eval_Cmnli_steps_per_second": 17.318,
293
+ "step": 30164
294
+ },
295
+ {
296
+ "epoch": 4.0,
297
+ "eval_LCQMC_loss": 0.019115839153528214,
298
+ "eval_LCQMC_runtime": 2.3344,
299
+ "eval_LCQMC_samples_per_second": 3770.582,
300
+ "eval_LCQMC_steps_per_second": 29.558,
301
+ "step": 30164
302
+ },
303
+ {
304
+ "epoch": 4.0,
305
+ "eval_Ocnli_loss": 0.015967663377523422,
306
+ "eval_Ocnli_runtime": 0.603,
307
+ "eval_Ocnli_samples_per_second": 3063.145,
308
+ "eval_Ocnli_steps_per_second": 24.877,
309
+ "step": 30164
310
+ },
311
+ {
312
+ "epoch": 4.0,
313
+ "eval_PAWSX_loss": 0.04721539840102196,
314
+ "eval_PAWSX_runtime": 0.9557,
315
+ "eval_PAWSX_samples_per_second": 2092.706,
316
+ "eval_PAWSX_steps_per_second": 16.742,
317
+ "step": 30164
318
+ },
319
+ {
320
+ "epoch": 4.0,
321
+ "eval_QBQTC_loss": 9.168432235717773,
322
+ "eval_QBQTC_runtime": 7.6741,
323
+ "eval_QBQTC_samples_per_second": 2606.178,
324
+ "eval_QBQTC_steps_per_second": 20.458,
325
+ "step": 30164
326
+ },
327
+ {
328
+ "epoch": 4.0,
329
+ "eval_STS-B_loss": 9.83575439453125,
330
+ "eval_STS-B_runtime": 0.5632,
331
+ "eval_STS-B_samples_per_second": 2588.593,
332
+ "eval_STS-B_steps_per_second": 21.305,
333
+ "step": 30164
334
+ },
335
+ {
336
+ "epoch": 5.0,
337
+ "grad_norm": 0.13791993260383606,
338
+ "learning_rate": 1.481822736068682e-05,
339
+ "loss": 1.4141,
340
+ "step": 37705
341
+ },
342
+ {
343
+ "epoch": 5.0,
344
+ "eval_AFQMC_loss": 0.0204468946903944,
345
+ "eval_AFQMC_runtime": 99.1315,
346
+ "eval_AFQMC_samples_per_second": 43.538,
347
+ "eval_AFQMC_steps_per_second": 0.343,
348
+ "eval_emb_eval_pearson_cosine": 0.6315631932372178,
349
+ "eval_emb_eval_spearman_cosine": 0.6690982617504025,
350
+ "step": 37705
351
+ },
352
+ {
353
+ "epoch": 5.0,
354
+ "eval_ATEC_loss": 0.014196612872183323,
355
+ "eval_ATEC_runtime": 6.5521,
356
+ "eval_ATEC_samples_per_second": 3052.435,
357
+ "eval_ATEC_steps_per_second": 23.962,
358
+ "step": 37705
359
+ },
360
+ {
361
+ "epoch": 5.0,
362
+ "eval_BQ_loss": 0.013593867421150208,
363
+ "eval_BQ_runtime": 3.4128,
364
+ "eval_BQ_samples_per_second": 2930.177,
365
+ "eval_BQ_steps_per_second": 23.148,
366
+ "step": 37705
367
+ },
368
+ {
369
+ "epoch": 5.0,
370
+ "eval_Cmnli_loss": 0.015099190175533295,
371
+ "eval_Cmnli_runtime": 3.8007,
372
+ "eval_Cmnli_samples_per_second": 2187.75,
373
+ "eval_Cmnli_steps_per_second": 17.102,
374
+ "step": 37705
375
+ },
376
+ {
377
+ "epoch": 5.0,
378
+ "eval_LCQMC_loss": 0.017175855115056038,
379
+ "eval_LCQMC_runtime": 2.4095,
380
+ "eval_LCQMC_samples_per_second": 3653.013,
381
+ "eval_LCQMC_steps_per_second": 28.636,
382
+ "step": 37705
383
+ },
384
+ {
385
+ "epoch": 5.0,
386
+ "eval_Ocnli_loss": 0.015738315880298615,
387
+ "eval_Ocnli_runtime": 0.619,
388
+ "eval_Ocnli_samples_per_second": 2983.626,
389
+ "eval_Ocnli_steps_per_second": 24.231,
390
+ "step": 37705
391
+ },
392
+ {
393
+ "epoch": 5.0,
394
+ "eval_PAWSX_loss": 0.046284567564725876,
395
+ "eval_PAWSX_runtime": 0.959,
396
+ "eval_PAWSX_samples_per_second": 2085.505,
397
+ "eval_PAWSX_steps_per_second": 16.684,
398
+ "step": 37705
399
+ },
400
+ {
401
+ "epoch": 5.0,
402
+ "eval_QBQTC_loss": 9.86181640625,
403
+ "eval_QBQTC_runtime": 7.9334,
404
+ "eval_QBQTC_samples_per_second": 2520.974,
405
+ "eval_QBQTC_steps_per_second": 19.79,
406
+ "step": 37705
407
+ },
408
+ {
409
+ "epoch": 5.0,
410
+ "eval_STS-B_loss": 9.911561965942383,
411
+ "eval_STS-B_runtime": 0.5701,
412
+ "eval_STS-B_samples_per_second": 2557.501,
413
+ "eval_STS-B_steps_per_second": 21.049,
414
+ "step": 37705
415
+ },
416
+ {
417
+ "epoch": 6.0,
418
+ "grad_norm": 0.17065556347370148,
419
+ "learning_rate": 1.3337327976582224e-05,
420
+ "loss": 1.3461,
421
+ "step": 45246
422
+ },
423
+ {
424
+ "epoch": 6.0,
425
+ "eval_AFQMC_loss": 0.02002647891640663,
426
+ "eval_AFQMC_runtime": 98.4869,
427
+ "eval_AFQMC_samples_per_second": 43.823,
428
+ "eval_AFQMC_steps_per_second": 0.345,
429
+ "eval_emb_eval_pearson_cosine": 0.6436085641934666,
430
+ "eval_emb_eval_spearman_cosine": 0.6715343325157711,
431
+ "step": 45246
432
+ },
433
+ {
434
+ "epoch": 6.0,
435
+ "eval_ATEC_loss": 0.014070386998355389,
436
+ "eval_ATEC_runtime": 6.5822,
437
+ "eval_ATEC_samples_per_second": 3038.48,
438
+ "eval_ATEC_steps_per_second": 23.852,
439
+ "step": 45246
440
+ },
441
+ {
442
+ "epoch": 6.0,
443
+ "eval_BQ_loss": 0.013585967943072319,
444
+ "eval_BQ_runtime": 3.6468,
445
+ "eval_BQ_samples_per_second": 2742.113,
446
+ "eval_BQ_steps_per_second": 21.663,
447
+ "step": 45246
448
+ },
449
+ {
450
+ "epoch": 6.0,
451
+ "eval_Cmnli_loss": 0.01478442084044218,
452
+ "eval_Cmnli_runtime": 3.7572,
453
+ "eval_Cmnli_samples_per_second": 2213.061,
454
+ "eval_Cmnli_steps_per_second": 17.3,
455
+ "step": 45246
456
+ },
457
+ {
458
+ "epoch": 6.0,
459
+ "eval_LCQMC_loss": 0.016025548800826073,
460
+ "eval_LCQMC_runtime": 2.3232,
461
+ "eval_LCQMC_samples_per_second": 3788.767,
462
+ "eval_LCQMC_steps_per_second": 29.701,
463
+ "step": 45246
464
+ },
465
+ {
466
+ "epoch": 6.0,
467
+ "eval_Ocnli_loss": 0.015464934520423412,
468
+ "eval_Ocnli_runtime": 0.6071,
469
+ "eval_Ocnli_samples_per_second": 3042.383,
470
+ "eval_Ocnli_steps_per_second": 24.708,
471
+ "step": 45246
472
+ },
473
+ {
474
+ "epoch": 6.0,
475
+ "eval_PAWSX_loss": 0.04284413903951645,
476
+ "eval_PAWSX_runtime": 0.9891,
477
+ "eval_PAWSX_samples_per_second": 2021.991,
478
+ "eval_PAWSX_steps_per_second": 16.176,
479
+ "step": 45246
480
+ },
481
+ {
482
+ "epoch": 6.0,
483
+ "eval_QBQTC_loss": 10.840497016906738,
484
+ "eval_QBQTC_runtime": 7.7281,
485
+ "eval_QBQTC_samples_per_second": 2587.958,
486
+ "eval_QBQTC_steps_per_second": 20.315,
487
+ "step": 45246
488
+ },
489
+ {
490
+ "epoch": 6.0,
491
+ "eval_STS-B_loss": 9.943366050720215,
492
+ "eval_STS-B_runtime": 0.5591,
493
+ "eval_STS-B_samples_per_second": 2607.892,
494
+ "eval_STS-B_steps_per_second": 21.464,
495
+ "step": 45246
496
+ },
497
+ {
498
+ "epoch": 7.0,
499
+ "grad_norm": 0.13601745665073395,
500
+ "learning_rate": 1.185642859247763e-05,
501
+ "loss": 1.2856,
502
+ "step": 52787
503
+ },
504
+ {
505
+ "epoch": 7.0,
506
+ "eval_AFQMC_loss": 0.019535699859261513,
507
+ "eval_AFQMC_runtime": 99.2494,
508
+ "eval_AFQMC_samples_per_second": 43.486,
509
+ "eval_AFQMC_steps_per_second": 0.343,
510
+ "eval_emb_eval_pearson_cosine": 0.6473204770905479,
511
+ "eval_emb_eval_spearman_cosine": 0.6723246242283224,
512
+ "step": 52787
513
+ },
514
+ {
515
+ "epoch": 7.0,
516
+ "eval_ATEC_loss": 0.013532574288547039,
517
+ "eval_ATEC_runtime": 6.5765,
518
+ "eval_ATEC_samples_per_second": 3041.133,
519
+ "eval_ATEC_steps_per_second": 23.873,
520
+ "step": 52787
521
+ },
522
+ {
523
+ "epoch": 7.0,
524
+ "eval_BQ_loss": 0.013674370013177395,
525
+ "eval_BQ_runtime": 3.4566,
526
+ "eval_BQ_samples_per_second": 2892.978,
527
+ "eval_BQ_steps_per_second": 22.855,
528
+ "step": 52787
529
+ },
530
+ {
531
+ "epoch": 7.0,
532
+ "eval_Cmnli_loss": 0.014647725969552994,
533
+ "eval_Cmnli_runtime": 3.7546,
534
+ "eval_Cmnli_samples_per_second": 2214.61,
535
+ "eval_Cmnli_steps_per_second": 17.312,
536
+ "step": 52787
537
+ },
538
+ {
539
+ "epoch": 7.0,
540
+ "eval_LCQMC_loss": 0.0150056267157197,
541
+ "eval_LCQMC_runtime": 2.3629,
542
+ "eval_LCQMC_samples_per_second": 3725.146,
543
+ "eval_LCQMC_steps_per_second": 29.202,
544
+ "step": 52787
545
+ },
546
+ {
547
+ "epoch": 7.0,
548
+ "eval_Ocnli_loss": 0.015559999272227287,
549
+ "eval_Ocnli_runtime": 0.6231,
550
+ "eval_Ocnli_samples_per_second": 2964.212,
551
+ "eval_Ocnli_steps_per_second": 24.073,
552
+ "step": 52787
553
+ },
554
+ {
555
+ "epoch": 7.0,
556
+ "eval_PAWSX_loss": 0.04360537603497505,
557
+ "eval_PAWSX_runtime": 0.9535,
558
+ "eval_PAWSX_samples_per_second": 2097.49,
559
+ "eval_PAWSX_steps_per_second": 16.78,
560
+ "step": 52787
561
+ },
562
+ {
563
+ "epoch": 7.0,
564
+ "eval_QBQTC_loss": 11.554633140563965,
565
+ "eval_QBQTC_runtime": 7.7436,
566
+ "eval_QBQTC_samples_per_second": 2582.791,
567
+ "eval_QBQTC_steps_per_second": 20.275,
568
+ "step": 52787
569
+ },
570
+ {
571
+ "epoch": 7.0,
572
+ "eval_STS-B_loss": 10.235774993896484,
573
+ "eval_STS-B_runtime": 0.5543,
574
+ "eval_STS-B_samples_per_second": 2630.224,
575
+ "eval_STS-B_steps_per_second": 21.648,
576
+ "step": 52787
577
+ },
578
+ {
579
+ "epoch": 8.0,
580
+ "grad_norm": 0.11353704333305359,
581
+ "learning_rate": 1.0375332750508335e-05,
582
+ "loss": 1.2182,
583
+ "step": 60328
584
+ },
585
+ {
586
+ "epoch": 8.0,
587
+ "eval_AFQMC_loss": 0.019351305440068245,
588
+ "eval_AFQMC_runtime": 99.553,
589
+ "eval_AFQMC_samples_per_second": 43.354,
590
+ "eval_AFQMC_steps_per_second": 0.342,
591
+ "eval_emb_eval_pearson_cosine": 0.6494222678001456,
592
+ "eval_emb_eval_spearman_cosine": 0.6703225826686093,
593
+ "step": 60328
594
+ },
595
+ {
596
+ "epoch": 8.0,
597
+ "eval_ATEC_loss": 0.013181576505303383,
598
+ "eval_ATEC_runtime": 6.522,
599
+ "eval_ATEC_samples_per_second": 3066.526,
600
+ "eval_ATEC_steps_per_second": 24.072,
601
+ "step": 60328
602
+ },
603
+ {
604
+ "epoch": 8.0,
605
+ "eval_BQ_loss": 0.013695988804101944,
606
+ "eval_BQ_runtime": 3.4134,
607
+ "eval_BQ_samples_per_second": 2929.642,
608
+ "eval_BQ_steps_per_second": 23.144,
609
+ "step": 60328
610
+ },
611
+ {
612
+ "epoch": 8.0,
613
+ "eval_Cmnli_loss": 0.014423331245779991,
614
+ "eval_Cmnli_runtime": 3.7623,
615
+ "eval_Cmnli_samples_per_second": 2210.105,
616
+ "eval_Cmnli_steps_per_second": 17.277,
617
+ "step": 60328
618
+ },
619
+ {
620
+ "epoch": 8.0,
621
+ "eval_LCQMC_loss": 0.015052268281579018,
622
+ "eval_LCQMC_runtime": 2.3257,
623
+ "eval_LCQMC_samples_per_second": 3784.705,
624
+ "eval_LCQMC_steps_per_second": 29.669,
625
+ "step": 60328
626
+ },
627
+ {
628
+ "epoch": 8.0,
629
+ "eval_Ocnli_loss": 0.015219747088849545,
630
+ "eval_Ocnli_runtime": 0.6121,
631
+ "eval_Ocnli_samples_per_second": 3017.401,
632
+ "eval_Ocnli_steps_per_second": 24.505,
633
+ "step": 60328
634
+ },
635
+ {
636
+ "epoch": 8.0,
637
+ "eval_PAWSX_loss": 0.042021822184324265,
638
+ "eval_PAWSX_runtime": 0.9526,
639
+ "eval_PAWSX_samples_per_second": 2099.597,
640
+ "eval_PAWSX_steps_per_second": 16.797,
641
+ "step": 60328
642
+ },
643
+ {
644
+ "epoch": 8.0,
645
+ "eval_QBQTC_loss": 12.265329360961914,
646
+ "eval_QBQTC_runtime": 7.6635,
647
+ "eval_QBQTC_samples_per_second": 2609.768,
648
+ "eval_QBQTC_steps_per_second": 20.487,
649
+ "step": 60328
650
+ },
651
+ {
652
+ "epoch": 8.0,
653
+ "eval_STS-B_loss": 10.596503257751465,
654
+ "eval_STS-B_runtime": 0.5656,
655
+ "eval_STS-B_samples_per_second": 2577.705,
656
+ "eval_STS-B_steps_per_second": 21.216,
657
+ "step": 60328
658
+ },
659
+ {
660
+ "epoch": 9.0,
661
+ "grad_norm": 0.10075096040964127,
662
+ "learning_rate": 8.89443336640374e-06,
663
+ "loss": 1.1546,
664
+ "step": 67869
665
+ },
666
+ {
667
+ "epoch": 9.0,
668
+ "eval_AFQMC_loss": 0.018916338682174683,
669
+ "eval_AFQMC_runtime": 99.1097,
670
+ "eval_AFQMC_samples_per_second": 43.548,
671
+ "eval_AFQMC_steps_per_second": 0.343,
672
+ "eval_emb_eval_pearson_cosine": 0.6461004194477242,
673
+ "eval_emb_eval_spearman_cosine": 0.6620616757187626,
674
+ "step": 67869
675
+ },
676
+ {
677
+ "epoch": 9.0,
678
+ "eval_ATEC_loss": 0.01316931750625372,
679
+ "eval_ATEC_runtime": 6.5342,
680
+ "eval_ATEC_samples_per_second": 3060.808,
681
+ "eval_ATEC_steps_per_second": 24.027,
682
+ "step": 67869
683
+ },
684
+ {
685
+ "epoch": 9.0,
686
+ "eval_BQ_loss": 0.013916433788836002,
687
+ "eval_BQ_runtime": 3.4228,
688
+ "eval_BQ_samples_per_second": 2921.626,
689
+ "eval_BQ_steps_per_second": 23.081,
690
+ "step": 67869
691
+ },
692
+ {
693
+ "epoch": 9.0,
694
+ "eval_Cmnli_loss": 0.014616083353757858,
695
+ "eval_Cmnli_runtime": 3.7166,
696
+ "eval_Cmnli_samples_per_second": 2237.257,
697
+ "eval_Cmnli_steps_per_second": 17.489,
698
+ "step": 67869
699
+ },
700
+ {
701
+ "epoch": 9.0,
702
+ "eval_LCQMC_loss": 0.014678360894322395,
703
+ "eval_LCQMC_runtime": 2.3376,
704
+ "eval_LCQMC_samples_per_second": 3765.326,
705
+ "eval_LCQMC_steps_per_second": 29.517,
706
+ "step": 67869
707
+ },
708
+ {
709
+ "epoch": 9.0,
710
+ "eval_Ocnli_loss": 0.015178242698311806,
711
+ "eval_Ocnli_runtime": 0.6061,
712
+ "eval_Ocnli_samples_per_second": 3047.588,
713
+ "eval_Ocnli_steps_per_second": 24.75,
714
+ "step": 67869
715
+ },
716
+ {
717
+ "epoch": 9.0,
718
+ "eval_PAWSX_loss": 0.04116755351424217,
719
+ "eval_PAWSX_runtime": 0.9482,
720
+ "eval_PAWSX_samples_per_second": 2109.261,
721
+ "eval_PAWSX_steps_per_second": 16.874,
722
+ "step": 67869
723
+ },
724
+ {
725
+ "epoch": 9.0,
726
+ "eval_QBQTC_loss": 12.928996086120605,
727
+ "eval_QBQTC_runtime": 7.7135,
728
+ "eval_QBQTC_samples_per_second": 2592.855,
729
+ "eval_QBQTC_steps_per_second": 20.354,
730
+ "step": 67869
731
+ },
732
+ {
733
+ "epoch": 9.0,
734
+ "eval_STS-B_loss": 10.769329071044922,
735
+ "eval_STS-B_runtime": 0.557,
736
+ "eval_STS-B_samples_per_second": 2617.49,
737
+ "eval_STS-B_steps_per_second": 21.543,
738
+ "step": 67869
739
+ },
740
+ {
741
+ "epoch": 10.0,
742
+ "grad_norm": 116.47360229492188,
743
+ "learning_rate": 7.413533982299147e-06,
744
+ "loss": 1.091,
745
+ "step": 75410
746
+ },
747
+ {
748
+ "epoch": 10.0,
749
+ "eval_AFQMC_loss": 0.01876525580883026,
750
+ "eval_AFQMC_runtime": 106.9735,
751
+ "eval_AFQMC_samples_per_second": 40.346,
752
+ "eval_AFQMC_steps_per_second": 0.318,
753
+ "eval_emb_eval_pearson_cosine": 0.6425398472325708,
754
+ "eval_emb_eval_spearman_cosine": 0.6571917650236837,
755
+ "step": 75410
756
+ },
757
+ {
758
+ "epoch": 10.0,
759
+ "eval_ATEC_loss": 0.012936480343341827,
760
+ "eval_ATEC_runtime": 6.5457,
761
+ "eval_ATEC_samples_per_second": 3055.455,
762
+ "eval_ATEC_steps_per_second": 23.985,
763
+ "step": 75410
764
+ },
765
+ {
766
+ "epoch": 10.0,
767
+ "eval_BQ_loss": 0.014092645607888699,
768
+ "eval_BQ_runtime": 3.4559,
769
+ "eval_BQ_samples_per_second": 2893.566,
770
+ "eval_BQ_steps_per_second": 22.859,
771
+ "step": 75410
772
+ },
773
+ {
774
+ "epoch": 10.0,
775
+ "eval_Cmnli_loss": 0.01457986794412136,
776
+ "eval_Cmnli_runtime": 3.7514,
777
+ "eval_Cmnli_samples_per_second": 2216.51,
778
+ "eval_Cmnli_steps_per_second": 17.327,
779
+ "step": 75410
780
+ },
781
+ {
782
+ "epoch": 10.0,
783
+ "eval_LCQMC_loss": 0.014272717759013176,
784
+ "eval_LCQMC_runtime": 2.3604,
785
+ "eval_LCQMC_samples_per_second": 3729.02,
786
+ "eval_LCQMC_steps_per_second": 29.232,
787
+ "step": 75410
788
+ },
789
+ {
790
+ "epoch": 10.0,
791
+ "eval_Ocnli_loss": 0.015037407167255878,
792
+ "eval_Ocnli_runtime": 0.6116,
793
+ "eval_Ocnli_samples_per_second": 3020.096,
794
+ "eval_Ocnli_steps_per_second": 24.527,
795
+ "step": 75410
796
+ },
797
+ {
798
+ "epoch": 10.0,
799
+ "eval_PAWSX_loss": 0.04113338142633438,
800
+ "eval_PAWSX_runtime": 0.9594,
801
+ "eval_PAWSX_samples_per_second": 2084.624,
802
+ "eval_PAWSX_steps_per_second": 16.677,
803
+ "step": 75410
804
+ },
805
+ {
806
+ "epoch": 10.0,
807
+ "eval_QBQTC_loss": 13.890397071838379,
808
+ "eval_QBQTC_runtime": 7.674,
809
+ "eval_QBQTC_samples_per_second": 2606.201,
810
+ "eval_QBQTC_steps_per_second": 20.459,
811
+ "step": 75410
812
+ },
813
+ {
814
+ "epoch": 10.0,
815
+ "eval_STS-B_loss": 10.956206321716309,
816
+ "eval_STS-B_runtime": 0.5649,
817
+ "eval_STS-B_samples_per_second": 2580.888,
818
+ "eval_STS-B_steps_per_second": 21.242,
819
+ "step": 75410
820
+ },
821
+ {
822
+ "epoch": 11.0,
823
+ "grad_norm": 0.15993598103523254,
824
+ "learning_rate": 5.933027513923951e-06,
825
+ "loss": 1.0267,
826
+ "step": 82951
827
+ },
828
+ {
829
+ "epoch": 11.0,
830
+ "eval_AFQMC_loss": 0.01858402043581009,
831
+ "eval_AFQMC_runtime": 99.2423,
832
+ "eval_AFQMC_samples_per_second": 43.49,
833
+ "eval_AFQMC_steps_per_second": 0.343,
834
+ "eval_emb_eval_pearson_cosine": 0.635671675332461,
835
+ "eval_emb_eval_spearman_cosine": 0.6462278753331322,
836
+ "step": 82951
837
+ },
838
+ {
839
+ "epoch": 11.0,
840
+ "eval_ATEC_loss": 0.012706396169960499,
841
+ "eval_ATEC_runtime": 6.5392,
842
+ "eval_ATEC_samples_per_second": 3058.477,
843
+ "eval_ATEC_steps_per_second": 24.009,
844
+ "step": 82951
845
+ },
846
+ {
847
+ "epoch": 11.0,
848
+ "eval_BQ_loss": 0.014143843203783035,
849
+ "eval_BQ_runtime": 3.3932,
850
+ "eval_BQ_samples_per_second": 2947.098,
851
+ "eval_BQ_steps_per_second": 23.282,
852
+ "step": 82951
853
+ },
854
+ {
855
+ "epoch": 11.0,
856
+ "eval_Cmnli_loss": 0.014601893723011017,
857
+ "eval_Cmnli_runtime": 3.7177,
858
+ "eval_Cmnli_samples_per_second": 2236.627,
859
+ "eval_Cmnli_steps_per_second": 17.484,
860
+ "step": 82951
861
+ },
862
+ {
863
+ "epoch": 11.0,
864
+ "eval_LCQMC_loss": 0.01407212857156992,
865
+ "eval_LCQMC_runtime": 2.3166,
866
+ "eval_LCQMC_samples_per_second": 3799.506,
867
+ "eval_LCQMC_steps_per_second": 29.785,
868
+ "step": 82951
869
+ },
870
+ {
871
+ "epoch": 11.0,
872
+ "eval_Ocnli_loss": 0.015255914069712162,
873
+ "eval_Ocnli_runtime": 0.6096,
874
+ "eval_Ocnli_samples_per_second": 3029.719,
875
+ "eval_Ocnli_steps_per_second": 24.605,
876
+ "step": 82951
877
+ },
878
+ {
879
+ "epoch": 11.0,
880
+ "eval_PAWSX_loss": 0.04009222611784935,
881
+ "eval_PAWSX_runtime": 0.9569,
882
+ "eval_PAWSX_samples_per_second": 2090.135,
883
+ "eval_PAWSX_steps_per_second": 16.721,
884
+ "step": 82951
885
+ },
886
+ {
887
+ "epoch": 11.0,
888
+ "eval_QBQTC_loss": 14.71971607208252,
889
+ "eval_QBQTC_runtime": 7.651,
890
+ "eval_QBQTC_samples_per_second": 2614.025,
891
+ "eval_QBQTC_steps_per_second": 20.52,
892
+ "step": 82951
893
+ },
894
+ {
895
+ "epoch": 11.0,
896
+ "eval_STS-B_loss": 11.138467788696289,
897
+ "eval_STS-B_runtime": 0.5605,
898
+ "eval_STS-B_samples_per_second": 2601.08,
899
+ "eval_STS-B_steps_per_second": 21.408,
900
+ "step": 82951
901
+ },
902
+ {
903
+ "epoch": 12.0,
904
+ "grad_norm": 0.22925728559494019,
905
+ "learning_rate": 4.451735214089958e-06,
906
+ "loss": 0.9688,
907
+ "step": 90492
908
+ },
909
+ {
910
+ "epoch": 12.0,
911
+ "eval_AFQMC_loss": 0.018353162333369255,
912
+ "eval_AFQMC_runtime": 98.1243,
913
+ "eval_AFQMC_samples_per_second": 43.985,
914
+ "eval_AFQMC_steps_per_second": 0.346,
915
+ "eval_emb_eval_pearson_cosine": 0.6340788346277473,
916
+ "eval_emb_eval_spearman_cosine": 0.6444001384260496,
917
+ "step": 90492
918
+ },
919
+ {
920
+ "epoch": 12.0,
921
+ "eval_ATEC_loss": 0.012524303048849106,
922
+ "eval_ATEC_runtime": 6.5509,
923
+ "eval_ATEC_samples_per_second": 3052.999,
924
+ "eval_ATEC_steps_per_second": 23.966,
925
+ "step": 90492
926
+ },
927
+ {
928
+ "epoch": 12.0,
929
+ "eval_BQ_loss": 0.014260655269026756,
930
+ "eval_BQ_runtime": 3.4399,
931
+ "eval_BQ_samples_per_second": 2907.038,
932
+ "eval_BQ_steps_per_second": 22.966,
933
+ "step": 90492
934
+ },
935
+ {
936
+ "epoch": 12.0,
937
+ "eval_Cmnli_loss": 0.014628582634031773,
938
+ "eval_Cmnli_runtime": 3.7252,
939
+ "eval_Cmnli_samples_per_second": 2232.102,
940
+ "eval_Cmnli_steps_per_second": 17.449,
941
+ "step": 90492
942
+ },
943
+ {
944
+ "epoch": 12.0,
945
+ "eval_LCQMC_loss": 0.013899387791752815,
946
+ "eval_LCQMC_runtime": 2.349,
947
+ "eval_LCQMC_samples_per_second": 3747.087,
948
+ "eval_LCQMC_steps_per_second": 29.374,
949
+ "step": 90492
950
+ },
951
+ {
952
+ "epoch": 12.0,
953
+ "eval_Ocnli_loss": 0.015396999195218086,
954
+ "eval_Ocnli_runtime": 0.6119,
955
+ "eval_Ocnli_samples_per_second": 3018.253,
956
+ "eval_Ocnli_steps_per_second": 24.512,
957
+ "step": 90492
958
+ },
959
+ {
960
+ "epoch": 12.0,
961
+ "eval_PAWSX_loss": 0.04036952182650566,
962
+ "eval_PAWSX_runtime": 0.9649,
963
+ "eval_PAWSX_samples_per_second": 2072.698,
964
+ "eval_PAWSX_steps_per_second": 16.582,
965
+ "step": 90492
966
+ },
967
+ {
968
+ "epoch": 12.0,
969
+ "eval_QBQTC_loss": 15.251015663146973,
970
+ "eval_QBQTC_runtime": 7.662,
971
+ "eval_QBQTC_samples_per_second": 2610.3,
972
+ "eval_QBQTC_steps_per_second": 20.491,
973
+ "step": 90492
974
+ },
975
+ {
976
+ "epoch": 12.0,
977
+ "eval_STS-B_loss": 11.509617805480957,
978
+ "eval_STS-B_runtime": 0.5625,
979
+ "eval_STS-B_samples_per_second": 2592.013,
980
+ "eval_STS-B_steps_per_second": 21.333,
981
+ "step": 90492
982
+ }
983
+ ],
984
+ "logging_steps": 500,
985
+ "max_steps": 113115,
986
+ "num_input_tokens_seen": 0,
987
+ "num_train_epochs": 15,
988
+ "save_steps": 500,
989
+ "stateful_callbacks": {
990
+ "TrainerControl": {
991
+ "args": {
992
+ "should_epoch_stop": false,
993
+ "should_evaluate": false,
994
+ "should_log": false,
995
+ "should_save": true,
996
+ "should_training_stop": false
997
+ },
998
+ "attributes": {}
999
+ }
1000
+ },
1001
+ "total_flos": 0.0,
1002
+ "train_batch_size": 128,
1003
+ "trial_name": null,
1004
+ "trial_params": null
1005
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff