lfcc commited on
Commit
ee0b77a
1 Parent(s): b29eac6

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-cased",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "B-Event",
14
+ "1": "B-Time",
15
+ "2": "I-Time",
16
+ "3": "I-Participant",
17
+ "4": "B-Spatial_Relation",
18
+ "5": "O",
19
+ "6": "I-Event",
20
+ "7": "I-Spatial_Relation",
21
+ "8": "B-Participant"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 3072,
25
+ "label2id": {
26
+ "B-Event": 0,
27
+ "B-Participant": 8,
28
+ "B-Spatial_Relation": 4,
29
+ "B-Time": 1,
30
+ "I-Event": 6,
31
+ "I-Participant": 3,
32
+ "I-Spatial_Relation": 7,
33
+ "I-Time": 2,
34
+ "O": 5
35
+ },
36
+ "layer_norm_eps": 1e-12,
37
+ "max_position_embeddings": 512,
38
+ "model_type": "bert",
39
+ "num_attention_heads": 12,
40
+ "num_hidden_layers": 12,
41
+ "pad_token_id": 0,
42
+ "position_embedding_type": "absolute",
43
+ "torch_dtype": "float32",
44
+ "transformers_version": "4.41.0",
45
+ "type_vocab_size": 2,
46
+ "use_cache": true,
47
+ "vocab_size": 28996
48
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:529129d17ffbb1a3af4cfead42b38ed3279dd213d0029a45656ca8c55d38dba6
3
+ size 430929740
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e03e5101f423bc6ac1b16de8271418ea416f9c16acbd4fa0bfc39c24431e1797
3
+ size 861973626
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:590fa2278dcc8c28730156cc937e39eda77b514751a067f8876cb4783e008930
3
+ size 13990
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ca1983fec4f33f3d582e039862ec5fb578d3ed0254dad229c539b1ace1c2c1a
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "BertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
trainer_state.json ADDED
@@ -0,0 +1,863 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5749279856681824,
3
+ "best_model_checkpoint": "models/lusa_en/checkpoint-110",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 110,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.045454545454545456,
13
+ "grad_norm": 4.094762325286865,
14
+ "learning_rate": 1.9990909090909092e-05,
15
+ "loss": 2.1525,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.09090909090909091,
20
+ "grad_norm": 4.10725212097168,
21
+ "learning_rate": 1.9981818181818185e-05,
22
+ "loss": 2.0303,
23
+ "step": 2
24
+ },
25
+ {
26
+ "epoch": 0.13636363636363635,
27
+ "grad_norm": 4.080411434173584,
28
+ "learning_rate": 1.9972727272727275e-05,
29
+ "loss": 1.926,
30
+ "step": 3
31
+ },
32
+ {
33
+ "epoch": 0.18181818181818182,
34
+ "grad_norm": 3.544022560119629,
35
+ "learning_rate": 1.9963636363636365e-05,
36
+ "loss": 1.8315,
37
+ "step": 4
38
+ },
39
+ {
40
+ "epoch": 0.22727272727272727,
41
+ "grad_norm": 2.855754852294922,
42
+ "learning_rate": 1.9954545454545455e-05,
43
+ "loss": 1.7865,
44
+ "step": 5
45
+ },
46
+ {
47
+ "epoch": 0.2727272727272727,
48
+ "grad_norm": 2.752898931503296,
49
+ "learning_rate": 1.994545454545455e-05,
50
+ "loss": 1.682,
51
+ "step": 6
52
+ },
53
+ {
54
+ "epoch": 0.3181818181818182,
55
+ "grad_norm": 2.3124074935913086,
56
+ "learning_rate": 1.993636363636364e-05,
57
+ "loss": 1.6458,
58
+ "step": 7
59
+ },
60
+ {
61
+ "epoch": 0.36363636363636365,
62
+ "grad_norm": 2.361560583114624,
63
+ "learning_rate": 1.992727272727273e-05,
64
+ "loss": 1.5782,
65
+ "step": 8
66
+ },
67
+ {
68
+ "epoch": 0.4090909090909091,
69
+ "grad_norm": 2.4446144104003906,
70
+ "learning_rate": 1.991818181818182e-05,
71
+ "loss": 1.5693,
72
+ "step": 9
73
+ },
74
+ {
75
+ "epoch": 0.45454545454545453,
76
+ "grad_norm": 2.2963356971740723,
77
+ "learning_rate": 1.9909090909090913e-05,
78
+ "loss": 1.5906,
79
+ "step": 10
80
+ },
81
+ {
82
+ "epoch": 0.5,
83
+ "grad_norm": 1.9563844203948975,
84
+ "learning_rate": 1.9900000000000003e-05,
85
+ "loss": 1.4818,
86
+ "step": 11
87
+ },
88
+ {
89
+ "epoch": 0.5454545454545454,
90
+ "grad_norm": 1.904161810874939,
91
+ "learning_rate": 1.9890909090909093e-05,
92
+ "loss": 1.4469,
93
+ "step": 12
94
+ },
95
+ {
96
+ "epoch": 0.5909090909090909,
97
+ "grad_norm": 1.6757051944732666,
98
+ "learning_rate": 1.9881818181818183e-05,
99
+ "loss": 1.3387,
100
+ "step": 13
101
+ },
102
+ {
103
+ "epoch": 0.6363636363636364,
104
+ "grad_norm": 1.670788049697876,
105
+ "learning_rate": 1.9872727272727276e-05,
106
+ "loss": 1.3976,
107
+ "step": 14
108
+ },
109
+ {
110
+ "epoch": 0.6818181818181818,
111
+ "grad_norm": 1.7997887134552002,
112
+ "learning_rate": 1.9863636363636366e-05,
113
+ "loss": 1.2961,
114
+ "step": 15
115
+ },
116
+ {
117
+ "epoch": 0.7272727272727273,
118
+ "grad_norm": 1.6230303049087524,
119
+ "learning_rate": 1.9854545454545456e-05,
120
+ "loss": 1.2923,
121
+ "step": 16
122
+ },
123
+ {
124
+ "epoch": 0.7727272727272727,
125
+ "grad_norm": 1.810758352279663,
126
+ "learning_rate": 1.9845454545454546e-05,
127
+ "loss": 1.2559,
128
+ "step": 17
129
+ },
130
+ {
131
+ "epoch": 0.8181818181818182,
132
+ "grad_norm": 2.564798593521118,
133
+ "learning_rate": 1.9836363636363636e-05,
134
+ "loss": 1.2239,
135
+ "step": 18
136
+ },
137
+ {
138
+ "epoch": 0.8636363636363636,
139
+ "grad_norm": 1.6320985555648804,
140
+ "learning_rate": 1.982727272727273e-05,
141
+ "loss": 1.1662,
142
+ "step": 19
143
+ },
144
+ {
145
+ "epoch": 0.9090909090909091,
146
+ "grad_norm": 1.5779709815979004,
147
+ "learning_rate": 1.981818181818182e-05,
148
+ "loss": 1.0843,
149
+ "step": 20
150
+ },
151
+ {
152
+ "epoch": 0.9545454545454546,
153
+ "grad_norm": 1.7842929363250732,
154
+ "learning_rate": 1.980909090909091e-05,
155
+ "loss": 1.0595,
156
+ "step": 21
157
+ },
158
+ {
159
+ "epoch": 1.0,
160
+ "grad_norm": 2.1895947456359863,
161
+ "learning_rate": 1.98e-05,
162
+ "loss": 1.2116,
163
+ "step": 22
164
+ },
165
+ {
166
+ "epoch": 1.0,
167
+ "eval_accuracy": 0.7246807246807246,
168
+ "eval_f1": 0.5378067100650976,
169
+ "eval_loss": 0.9455885291099548,
170
+ "eval_precision": 0.5496417604912999,
171
+ "eval_recall": 0.5264705882352941,
172
+ "eval_runtime": 5.3772,
173
+ "eval_samples_per_second": 24.362,
174
+ "eval_steps_per_second": 0.93,
175
+ "step": 22
176
+ },
177
+ {
178
+ "epoch": 1.0454545454545454,
179
+ "grad_norm": 1.5596168041229248,
180
+ "learning_rate": 1.979090909090909e-05,
181
+ "loss": 1.1374,
182
+ "step": 23
183
+ },
184
+ {
185
+ "epoch": 1.0909090909090908,
186
+ "grad_norm": 1.4826650619506836,
187
+ "learning_rate": 1.9781818181818184e-05,
188
+ "loss": 0.9918,
189
+ "step": 24
190
+ },
191
+ {
192
+ "epoch": 1.1363636363636362,
193
+ "grad_norm": 1.6731046438217163,
194
+ "learning_rate": 1.9772727272727274e-05,
195
+ "loss": 0.8705,
196
+ "step": 25
197
+ },
198
+ {
199
+ "epoch": 1.1818181818181819,
200
+ "grad_norm": 1.8360694646835327,
201
+ "learning_rate": 1.9763636363636364e-05,
202
+ "loss": 1.0117,
203
+ "step": 26
204
+ },
205
+ {
206
+ "epoch": 1.2272727272727273,
207
+ "grad_norm": 1.6999843120574951,
208
+ "learning_rate": 1.9754545454545454e-05,
209
+ "loss": 0.9752,
210
+ "step": 27
211
+ },
212
+ {
213
+ "epoch": 1.2727272727272727,
214
+ "grad_norm": 1.629109263420105,
215
+ "learning_rate": 1.9745454545454547e-05,
216
+ "loss": 0.9142,
217
+ "step": 28
218
+ },
219
+ {
220
+ "epoch": 1.3181818181818181,
221
+ "grad_norm": 1.4835275411605835,
222
+ "learning_rate": 1.9736363636363637e-05,
223
+ "loss": 0.8984,
224
+ "step": 29
225
+ },
226
+ {
227
+ "epoch": 1.3636363636363638,
228
+ "grad_norm": 1.3297476768493652,
229
+ "learning_rate": 1.9727272727272728e-05,
230
+ "loss": 0.9224,
231
+ "step": 30
232
+ },
233
+ {
234
+ "epoch": 1.4090909090909092,
235
+ "grad_norm": 1.582480788230896,
236
+ "learning_rate": 1.971818181818182e-05,
237
+ "loss": 0.8557,
238
+ "step": 31
239
+ },
240
+ {
241
+ "epoch": 1.4545454545454546,
242
+ "grad_norm": 1.3994060754776,
243
+ "learning_rate": 1.970909090909091e-05,
244
+ "loss": 0.748,
245
+ "step": 32
246
+ },
247
+ {
248
+ "epoch": 1.5,
249
+ "grad_norm": 2.084137201309204,
250
+ "learning_rate": 1.97e-05,
251
+ "loss": 1.1522,
252
+ "step": 33
253
+ },
254
+ {
255
+ "epoch": 1.5454545454545454,
256
+ "grad_norm": 1.8021726608276367,
257
+ "learning_rate": 1.969090909090909e-05,
258
+ "loss": 0.8038,
259
+ "step": 34
260
+ },
261
+ {
262
+ "epoch": 1.5909090909090908,
263
+ "grad_norm": 1.3661516904830933,
264
+ "learning_rate": 1.9681818181818185e-05,
265
+ "loss": 0.8233,
266
+ "step": 35
267
+ },
268
+ {
269
+ "epoch": 1.6363636363636362,
270
+ "grad_norm": 1.5308856964111328,
271
+ "learning_rate": 1.9672727272727275e-05,
272
+ "loss": 0.8182,
273
+ "step": 36
274
+ },
275
+ {
276
+ "epoch": 1.6818181818181817,
277
+ "grad_norm": 1.5709282159805298,
278
+ "learning_rate": 1.9663636363636365e-05,
279
+ "loss": 0.7915,
280
+ "step": 37
281
+ },
282
+ {
283
+ "epoch": 1.7272727272727273,
284
+ "grad_norm": 1.6001756191253662,
285
+ "learning_rate": 1.9654545454545458e-05,
286
+ "loss": 0.8671,
287
+ "step": 38
288
+ },
289
+ {
290
+ "epoch": 1.7727272727272727,
291
+ "grad_norm": 1.243642807006836,
292
+ "learning_rate": 1.964545454545455e-05,
293
+ "loss": 0.8128,
294
+ "step": 39
295
+ },
296
+ {
297
+ "epoch": 1.8181818181818183,
298
+ "grad_norm": 1.9662197828292847,
299
+ "learning_rate": 1.963636363636364e-05,
300
+ "loss": 0.7816,
301
+ "step": 40
302
+ },
303
+ {
304
+ "epoch": 1.8636363636363638,
305
+ "grad_norm": 1.392984390258789,
306
+ "learning_rate": 1.962727272727273e-05,
307
+ "loss": 0.7299,
308
+ "step": 41
309
+ },
310
+ {
311
+ "epoch": 1.9090909090909092,
312
+ "grad_norm": 1.7500344514846802,
313
+ "learning_rate": 1.9618181818181822e-05,
314
+ "loss": 0.8295,
315
+ "step": 42
316
+ },
317
+ {
318
+ "epoch": 1.9545454545454546,
319
+ "grad_norm": 1.3700010776519775,
320
+ "learning_rate": 1.9609090909090912e-05,
321
+ "loss": 0.7227,
322
+ "step": 43
323
+ },
324
+ {
325
+ "epoch": 2.0,
326
+ "grad_norm": 1.7891302108764648,
327
+ "learning_rate": 1.9600000000000002e-05,
328
+ "loss": 0.8371,
329
+ "step": 44
330
+ },
331
+ {
332
+ "epoch": 2.0,
333
+ "eval_accuracy": 0.7840807840807841,
334
+ "eval_f1": 0.6456470588235295,
335
+ "eval_loss": 0.68104088306427,
336
+ "eval_precision": 0.6208144796380091,
337
+ "eval_recall": 0.6725490196078432,
338
+ "eval_runtime": 5.7284,
339
+ "eval_samples_per_second": 22.869,
340
+ "eval_steps_per_second": 0.873,
341
+ "step": 44
342
+ },
343
+ {
344
+ "epoch": 2.0454545454545454,
345
+ "grad_norm": 1.324338436126709,
346
+ "learning_rate": 1.9590909090909092e-05,
347
+ "loss": 0.6347,
348
+ "step": 45
349
+ },
350
+ {
351
+ "epoch": 2.090909090909091,
352
+ "grad_norm": 1.5182204246520996,
353
+ "learning_rate": 1.9581818181818186e-05,
354
+ "loss": 0.713,
355
+ "step": 46
356
+ },
357
+ {
358
+ "epoch": 2.1363636363636362,
359
+ "grad_norm": 1.2550334930419922,
360
+ "learning_rate": 1.9572727272727276e-05,
361
+ "loss": 0.7751,
362
+ "step": 47
363
+ },
364
+ {
365
+ "epoch": 2.1818181818181817,
366
+ "grad_norm": 1.2773672342300415,
367
+ "learning_rate": 1.9563636363636366e-05,
368
+ "loss": 0.6857,
369
+ "step": 48
370
+ },
371
+ {
372
+ "epoch": 2.227272727272727,
373
+ "grad_norm": 1.2220115661621094,
374
+ "learning_rate": 1.9554545454545456e-05,
375
+ "loss": 0.6481,
376
+ "step": 49
377
+ },
378
+ {
379
+ "epoch": 2.2727272727272725,
380
+ "grad_norm": 1.2490642070770264,
381
+ "learning_rate": 1.9545454545454546e-05,
382
+ "loss": 0.6727,
383
+ "step": 50
384
+ },
385
+ {
386
+ "epoch": 2.3181818181818183,
387
+ "grad_norm": 1.7800368070602417,
388
+ "learning_rate": 1.953636363636364e-05,
389
+ "loss": 0.7261,
390
+ "step": 51
391
+ },
392
+ {
393
+ "epoch": 2.3636363636363638,
394
+ "grad_norm": 1.466561198234558,
395
+ "learning_rate": 1.952727272727273e-05,
396
+ "loss": 0.679,
397
+ "step": 52
398
+ },
399
+ {
400
+ "epoch": 2.409090909090909,
401
+ "grad_norm": 1.6164387464523315,
402
+ "learning_rate": 1.951818181818182e-05,
403
+ "loss": 0.6693,
404
+ "step": 53
405
+ },
406
+ {
407
+ "epoch": 2.4545454545454546,
408
+ "grad_norm": 1.4198397397994995,
409
+ "learning_rate": 1.950909090909091e-05,
410
+ "loss": 0.608,
411
+ "step": 54
412
+ },
413
+ {
414
+ "epoch": 2.5,
415
+ "grad_norm": 1.4696757793426514,
416
+ "learning_rate": 1.95e-05,
417
+ "loss": 0.749,
418
+ "step": 55
419
+ },
420
+ {
421
+ "epoch": 2.5454545454545454,
422
+ "grad_norm": 1.4631857872009277,
423
+ "learning_rate": 1.9490909090909093e-05,
424
+ "loss": 0.6256,
425
+ "step": 56
426
+ },
427
+ {
428
+ "epoch": 2.590909090909091,
429
+ "grad_norm": 1.4575796127319336,
430
+ "learning_rate": 1.9481818181818183e-05,
431
+ "loss": 0.6964,
432
+ "step": 57
433
+ },
434
+ {
435
+ "epoch": 2.6363636363636362,
436
+ "grad_norm": 1.2283692359924316,
437
+ "learning_rate": 1.9472727272727273e-05,
438
+ "loss": 0.7201,
439
+ "step": 58
440
+ },
441
+ {
442
+ "epoch": 2.6818181818181817,
443
+ "grad_norm": 1.2565014362335205,
444
+ "learning_rate": 1.9463636363636363e-05,
445
+ "loss": 0.6112,
446
+ "step": 59
447
+ },
448
+ {
449
+ "epoch": 2.7272727272727275,
450
+ "grad_norm": 1.3832677602767944,
451
+ "learning_rate": 1.9454545454545457e-05,
452
+ "loss": 0.5858,
453
+ "step": 60
454
+ },
455
+ {
456
+ "epoch": 2.7727272727272725,
457
+ "grad_norm": 1.679886817932129,
458
+ "learning_rate": 1.9445454545454547e-05,
459
+ "loss": 0.8611,
460
+ "step": 61
461
+ },
462
+ {
463
+ "epoch": 2.8181818181818183,
464
+ "grad_norm": 1.3422367572784424,
465
+ "learning_rate": 1.9436363636363637e-05,
466
+ "loss": 0.6522,
467
+ "step": 62
468
+ },
469
+ {
470
+ "epoch": 2.8636363636363638,
471
+ "grad_norm": 1.2293131351470947,
472
+ "learning_rate": 1.9427272727272727e-05,
473
+ "loss": 0.5756,
474
+ "step": 63
475
+ },
476
+ {
477
+ "epoch": 2.909090909090909,
478
+ "grad_norm": 1.7241827249526978,
479
+ "learning_rate": 1.941818181818182e-05,
480
+ "loss": 0.5771,
481
+ "step": 64
482
+ },
483
+ {
484
+ "epoch": 2.9545454545454546,
485
+ "grad_norm": 1.3407361507415771,
486
+ "learning_rate": 1.940909090909091e-05,
487
+ "loss": 0.6396,
488
+ "step": 65
489
+ },
490
+ {
491
+ "epoch": 3.0,
492
+ "grad_norm": 1.4412184953689575,
493
+ "learning_rate": 1.94e-05,
494
+ "loss": 0.7121,
495
+ "step": 66
496
+ },
497
+ {
498
+ "epoch": 3.0,
499
+ "eval_accuracy": 0.809028809028809,
500
+ "eval_f1": 0.6716697936210131,
501
+ "eval_loss": 0.6103370189666748,
502
+ "eval_precision": 0.6438848920863309,
503
+ "eval_recall": 0.7019607843137254,
504
+ "eval_runtime": 6.4253,
505
+ "eval_samples_per_second": 20.388,
506
+ "eval_steps_per_second": 0.778,
507
+ "step": 66
508
+ },
509
+ {
510
+ "epoch": 3.0454545454545454,
511
+ "grad_norm": 1.5174870491027832,
512
+ "learning_rate": 1.9390909090909094e-05,
513
+ "loss": 0.5993,
514
+ "step": 67
515
+ },
516
+ {
517
+ "epoch": 3.090909090909091,
518
+ "grad_norm": 1.2559189796447754,
519
+ "learning_rate": 1.9381818181818184e-05,
520
+ "loss": 0.599,
521
+ "step": 68
522
+ },
523
+ {
524
+ "epoch": 3.1363636363636362,
525
+ "grad_norm": 1.3573830127716064,
526
+ "learning_rate": 1.9372727272727274e-05,
527
+ "loss": 0.6561,
528
+ "step": 69
529
+ },
530
+ {
531
+ "epoch": 3.1818181818181817,
532
+ "grad_norm": 1.2237664461135864,
533
+ "learning_rate": 1.9363636363636364e-05,
534
+ "loss": 0.5644,
535
+ "step": 70
536
+ },
537
+ {
538
+ "epoch": 3.227272727272727,
539
+ "grad_norm": 1.7508504390716553,
540
+ "learning_rate": 1.9354545454545458e-05,
541
+ "loss": 0.6542,
542
+ "step": 71
543
+ },
544
+ {
545
+ "epoch": 3.2727272727272725,
546
+ "grad_norm": 1.35462486743927,
547
+ "learning_rate": 1.9345454545454548e-05,
548
+ "loss": 0.5616,
549
+ "step": 72
550
+ },
551
+ {
552
+ "epoch": 3.3181818181818183,
553
+ "grad_norm": 1.3244951963424683,
554
+ "learning_rate": 1.9336363636363638e-05,
555
+ "loss": 0.6016,
556
+ "step": 73
557
+ },
558
+ {
559
+ "epoch": 3.3636363636363638,
560
+ "grad_norm": 1.3806241750717163,
561
+ "learning_rate": 1.9327272727272728e-05,
562
+ "loss": 0.5168,
563
+ "step": 74
564
+ },
565
+ {
566
+ "epoch": 3.409090909090909,
567
+ "grad_norm": 1.3947114944458008,
568
+ "learning_rate": 1.931818181818182e-05,
569
+ "loss": 0.5554,
570
+ "step": 75
571
+ },
572
+ {
573
+ "epoch": 3.4545454545454546,
574
+ "grad_norm": 1.4679116010665894,
575
+ "learning_rate": 1.930909090909091e-05,
576
+ "loss": 0.5753,
577
+ "step": 76
578
+ },
579
+ {
580
+ "epoch": 3.5,
581
+ "grad_norm": 1.2916615009307861,
582
+ "learning_rate": 1.93e-05,
583
+ "loss": 0.4835,
584
+ "step": 77
585
+ },
586
+ {
587
+ "epoch": 3.5454545454545454,
588
+ "grad_norm": 1.713321328163147,
589
+ "learning_rate": 1.9290909090909095e-05,
590
+ "loss": 0.5753,
591
+ "step": 78
592
+ },
593
+ {
594
+ "epoch": 3.590909090909091,
595
+ "grad_norm": 1.3171674013137817,
596
+ "learning_rate": 1.9281818181818185e-05,
597
+ "loss": 0.5477,
598
+ "step": 79
599
+ },
600
+ {
601
+ "epoch": 3.6363636363636362,
602
+ "grad_norm": 1.6939257383346558,
603
+ "learning_rate": 1.9272727272727275e-05,
604
+ "loss": 0.7424,
605
+ "step": 80
606
+ },
607
+ {
608
+ "epoch": 3.6818181818181817,
609
+ "grad_norm": 1.6177845001220703,
610
+ "learning_rate": 1.9263636363636365e-05,
611
+ "loss": 0.6225,
612
+ "step": 81
613
+ },
614
+ {
615
+ "epoch": 3.7272727272727275,
616
+ "grad_norm": 1.7323453426361084,
617
+ "learning_rate": 1.9254545454545455e-05,
618
+ "loss": 0.4644,
619
+ "step": 82
620
+ },
621
+ {
622
+ "epoch": 3.7727272727272725,
623
+ "grad_norm": 1.2443021535873413,
624
+ "learning_rate": 1.924545454545455e-05,
625
+ "loss": 0.524,
626
+ "step": 83
627
+ },
628
+ {
629
+ "epoch": 3.8181818181818183,
630
+ "grad_norm": 1.6111009120941162,
631
+ "learning_rate": 1.923636363636364e-05,
632
+ "loss": 0.6301,
633
+ "step": 84
634
+ },
635
+ {
636
+ "epoch": 3.8636363636363638,
637
+ "grad_norm": 1.3090589046478271,
638
+ "learning_rate": 1.922727272727273e-05,
639
+ "loss": 0.6209,
640
+ "step": 85
641
+ },
642
+ {
643
+ "epoch": 3.909090909090909,
644
+ "grad_norm": 1.2143287658691406,
645
+ "learning_rate": 1.921818181818182e-05,
646
+ "loss": 0.523,
647
+ "step": 86
648
+ },
649
+ {
650
+ "epoch": 3.9545454545454546,
651
+ "grad_norm": 1.551576018333435,
652
+ "learning_rate": 1.920909090909091e-05,
653
+ "loss": 0.6025,
654
+ "step": 87
655
+ },
656
+ {
657
+ "epoch": 4.0,
658
+ "grad_norm": 1.606203317642212,
659
+ "learning_rate": 1.9200000000000003e-05,
660
+ "loss": 0.59,
661
+ "step": 88
662
+ },
663
+ {
664
+ "epoch": 4.0,
665
+ "eval_accuracy": 0.8238788238788238,
666
+ "eval_f1": 0.7027790861987753,
667
+ "eval_loss": 0.5813168883323669,
668
+ "eval_precision": 0.6763372620126926,
669
+ "eval_recall": 0.7313725490196078,
670
+ "eval_runtime": 5.433,
671
+ "eval_samples_per_second": 24.112,
672
+ "eval_steps_per_second": 0.92,
673
+ "step": 88
674
+ },
675
+ {
676
+ "epoch": 4.045454545454546,
677
+ "grad_norm": 1.6170856952667236,
678
+ "learning_rate": 1.9190909090909093e-05,
679
+ "loss": 0.5609,
680
+ "step": 89
681
+ },
682
+ {
683
+ "epoch": 4.090909090909091,
684
+ "grad_norm": 1.21828031539917,
685
+ "learning_rate": 1.9181818181818183e-05,
686
+ "loss": 0.5444,
687
+ "step": 90
688
+ },
689
+ {
690
+ "epoch": 4.136363636363637,
691
+ "grad_norm": 1.3539512157440186,
692
+ "learning_rate": 1.9172727272727273e-05,
693
+ "loss": 0.5608,
694
+ "step": 91
695
+ },
696
+ {
697
+ "epoch": 4.181818181818182,
698
+ "grad_norm": 1.2486921548843384,
699
+ "learning_rate": 1.9163636363636363e-05,
700
+ "loss": 0.4976,
701
+ "step": 92
702
+ },
703
+ {
704
+ "epoch": 4.2272727272727275,
705
+ "grad_norm": 1.474204659461975,
706
+ "learning_rate": 1.9154545454545456e-05,
707
+ "loss": 0.4646,
708
+ "step": 93
709
+ },
710
+ {
711
+ "epoch": 4.2727272727272725,
712
+ "grad_norm": 1.4018511772155762,
713
+ "learning_rate": 1.9145454545454546e-05,
714
+ "loss": 0.4138,
715
+ "step": 94
716
+ },
717
+ {
718
+ "epoch": 4.318181818181818,
719
+ "grad_norm": 1.6630454063415527,
720
+ "learning_rate": 1.9136363636363636e-05,
721
+ "loss": 0.6184,
722
+ "step": 95
723
+ },
724
+ {
725
+ "epoch": 4.363636363636363,
726
+ "grad_norm": 1.681337594985962,
727
+ "learning_rate": 1.912727272727273e-05,
728
+ "loss": 0.4985,
729
+ "step": 96
730
+ },
731
+ {
732
+ "epoch": 4.409090909090909,
733
+ "grad_norm": 1.1836535930633545,
734
+ "learning_rate": 1.911818181818182e-05,
735
+ "loss": 0.4481,
736
+ "step": 97
737
+ },
738
+ {
739
+ "epoch": 4.454545454545454,
740
+ "grad_norm": 1.2125182151794434,
741
+ "learning_rate": 1.910909090909091e-05,
742
+ "loss": 0.4427,
743
+ "step": 98
744
+ },
745
+ {
746
+ "epoch": 4.5,
747
+ "grad_norm": 1.1907086372375488,
748
+ "learning_rate": 1.91e-05,
749
+ "loss": 0.4656,
750
+ "step": 99
751
+ },
752
+ {
753
+ "epoch": 4.545454545454545,
754
+ "grad_norm": 1.6791960000991821,
755
+ "learning_rate": 1.9090909090909094e-05,
756
+ "loss": 0.5132,
757
+ "step": 100
758
+ },
759
+ {
760
+ "epoch": 4.590909090909091,
761
+ "grad_norm": 2.0220723152160645,
762
+ "learning_rate": 1.9081818181818184e-05,
763
+ "loss": 0.7157,
764
+ "step": 101
765
+ },
766
+ {
767
+ "epoch": 4.636363636363637,
768
+ "grad_norm": 1.3273769617080688,
769
+ "learning_rate": 1.9072727272727274e-05,
770
+ "loss": 0.5186,
771
+ "step": 102
772
+ },
773
+ {
774
+ "epoch": 4.681818181818182,
775
+ "grad_norm": 1.971346378326416,
776
+ "learning_rate": 1.9063636363636364e-05,
777
+ "loss": 0.4861,
778
+ "step": 103
779
+ },
780
+ {
781
+ "epoch": 4.7272727272727275,
782
+ "grad_norm": 1.4221469163894653,
783
+ "learning_rate": 1.9054545454545457e-05,
784
+ "loss": 0.5415,
785
+ "step": 104
786
+ },
787
+ {
788
+ "epoch": 4.7727272727272725,
789
+ "grad_norm": 1.5409835577011108,
790
+ "learning_rate": 1.9045454545454547e-05,
791
+ "loss": 0.4741,
792
+ "step": 105
793
+ },
794
+ {
795
+ "epoch": 4.818181818181818,
796
+ "grad_norm": 2.085914134979248,
797
+ "learning_rate": 1.9036363636363637e-05,
798
+ "loss": 0.5062,
799
+ "step": 106
800
+ },
801
+ {
802
+ "epoch": 4.863636363636363,
803
+ "grad_norm": 1.571999430656433,
804
+ "learning_rate": 1.902727272727273e-05,
805
+ "loss": 0.6323,
806
+ "step": 107
807
+ },
808
+ {
809
+ "epoch": 4.909090909090909,
810
+ "grad_norm": 1.3681639432907104,
811
+ "learning_rate": 1.901818181818182e-05,
812
+ "loss": 0.5369,
813
+ "step": 108
814
+ },
815
+ {
816
+ "epoch": 4.954545454545455,
817
+ "grad_norm": 1.8829340934753418,
818
+ "learning_rate": 1.900909090909091e-05,
819
+ "loss": 0.4372,
820
+ "step": 109
821
+ },
822
+ {
823
+ "epoch": 5.0,
824
+ "grad_norm": 1.9513062238693237,
825
+ "learning_rate": 1.9e-05,
826
+ "loss": 0.5141,
827
+ "step": 110
828
+ },
829
+ {
830
+ "epoch": 5.0,
831
+ "eval_accuracy": 0.8241758241758241,
832
+ "eval_f1": 0.6876172607879925,
833
+ "eval_loss": 0.5749279856681824,
834
+ "eval_precision": 0.6591726618705036,
835
+ "eval_recall": 0.7186274509803922,
836
+ "eval_runtime": 5.3919,
837
+ "eval_samples_per_second": 24.296,
838
+ "eval_steps_per_second": 0.927,
839
+ "step": 110
840
+ }
841
+ ],
842
+ "logging_steps": 1,
843
+ "max_steps": 2200,
844
+ "num_input_tokens_seen": 0,
845
+ "num_train_epochs": 100,
846
+ "save_steps": 500,
847
+ "stateful_callbacks": {
848
+ "TrainerControl": {
849
+ "args": {
850
+ "should_epoch_stop": false,
851
+ "should_evaluate": false,
852
+ "should_log": false,
853
+ "should_save": true,
854
+ "should_training_stop": false
855
+ },
856
+ "attributes": {}
857
+ }
858
+ },
859
+ "total_flos": 156073447450800.0,
860
+ "train_batch_size": 32,
861
+ "trial_name": null,
862
+ "trial_params": null
863
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edb300b40b2ded07f15b22bd62db3afb05b9dfc30c8651b44697f89bbdc355f9
3
+ size 5112
vocab.txt ADDED
The diff for this file is too large to render. See raw diff