Rawodo commited on
Commit
d888fd8
·
1 Parent(s): 0ca233c

Training in progress, step 4800, checkpoint

Browse files
checkpoint-4800/config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "zwaarcontrast/lilt-xlm-roberta-base",
3
+ "architectures": [
4
+ "LiltForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "channel_shrink_ratio": 4,
9
+ "classifier_dropout": null,
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "Caption",
16
+ "1": "Footnote",
17
+ "2": "Formula",
18
+ "3": "List-item",
19
+ "4": "Page-footer",
20
+ "5": "Page-header",
21
+ "6": "Picture",
22
+ "7": "Section-header",
23
+ "8": "Table",
24
+ "9": "Text",
25
+ "10": "Title"
26
+ },
27
+ "initializer_range": 0.02,
28
+ "intermediate_size": 3072,
29
+ "label2id": {
30
+ "Caption": 0,
31
+ "Footnote": 1,
32
+ "Formula": 2,
33
+ "List-item": 3,
34
+ "Page-footer": 4,
35
+ "Page-header": 5,
36
+ "Picture": 6,
37
+ "Section-header": 7,
38
+ "Table": 8,
39
+ "Text": 9,
40
+ "Title": 10
41
+ },
42
+ "layer_norm_eps": 1e-05,
43
+ "max_2d_position_embeddings": 1024,
44
+ "max_position_embeddings": 514,
45
+ "model_type": "lilt",
46
+ "num_attention_heads": 12,
47
+ "num_hidden_layers": 12,
48
+ "output_past": true,
49
+ "pad_token_id": 1,
50
+ "position_embedding_type": "absolute",
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.35.2",
53
+ "type_vocab_size": 1,
54
+ "use_cache": true,
55
+ "vocab_size": 250002
56
+ }
checkpoint-4800/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf96cb4d9683ac2c85fb139dcca7e845f7d430270d692f1249b75fe65d2f4ead
3
+ size 1134332228
checkpoint-4800/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afd11bec2714a4855c94e2d253e646dd2527636516e1238a0010a5ab6cb28c76
3
+ size 2265924830
checkpoint-4800/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eb6b52c485d17dd3f00ee52a0a46691eb7bb3d1ec3fe1e444c2b09a5a0ca334
3
+ size 14244
checkpoint-4800/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0148bc05146219c5b3f8d75be4b5ab2a0c1e0383a17b7bfd5898a859caa54041
3
+ size 1064
checkpoint-4800/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
checkpoint-4800/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59925fcb90c92b894cb93e51bb9b4a6105c5c249fe54ce1c704420ac39b81af
3
+ size 17082756
checkpoint-4800/tokenizer_config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "mask_token": "<mask>",
49
+ "model_max_length": 512,
50
+ "pad_token": "<pad>",
51
+ "sep_token": "</s>",
52
+ "tokenizer_class": "XLMRobertaTokenizer",
53
+ "unk_token": "<unk>"
54
+ }
checkpoint-4800/trainer_state.json ADDED
@@ -0,0 +1,649 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8966457253905286,
3
+ "best_model_checkpoint": "DocLayNet/lilt-xlm-roberta-base-finetuned-DocLayNet-large_paragraphs_ml512-v1\\checkpoint-3300",
4
+ "epoch": 0.2548041193332626,
5
+ "eval_steps": 100,
6
+ "global_step": 4800,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.01,
13
+ "eval_accuracy": 0.7151480521438768,
14
+ "eval_f1": 0.7151480521438768,
15
+ "eval_loss": 0.8854337930679321,
16
+ "eval_precision": 0.7151480521438768,
17
+ "eval_recall": 0.7151480521438768,
18
+ "eval_runtime": 178.7293,
19
+ "eval_samples_per_second": 89.291,
20
+ "eval_steps_per_second": 5.584,
21
+ "step": 100
22
+ },
23
+ {
24
+ "epoch": 0.01,
25
+ "eval_accuracy": 0.7820361348614215,
26
+ "eval_f1": 0.7820361348614215,
27
+ "eval_loss": 0.6539337038993835,
28
+ "eval_precision": 0.7820361348614215,
29
+ "eval_recall": 0.7820361348614215,
30
+ "eval_runtime": 180.7707,
31
+ "eval_samples_per_second": 88.283,
32
+ "eval_steps_per_second": 5.521,
33
+ "step": 200
34
+ },
35
+ {
36
+ "epoch": 0.02,
37
+ "eval_accuracy": 0.7994587607645203,
38
+ "eval_f1": 0.7994587607645203,
39
+ "eval_loss": 0.6309346556663513,
40
+ "eval_precision": 0.7994587607645203,
41
+ "eval_recall": 0.7994587607645203,
42
+ "eval_runtime": 181.7301,
43
+ "eval_samples_per_second": 87.817,
44
+ "eval_steps_per_second": 5.492,
45
+ "step": 300
46
+ },
47
+ {
48
+ "epoch": 0.02,
49
+ "eval_accuracy": 0.7618061280264906,
50
+ "eval_f1": 0.7618061280264905,
51
+ "eval_loss": 0.6511958837509155,
52
+ "eval_precision": 0.7618061280264906,
53
+ "eval_recall": 0.7618061280264906,
54
+ "eval_runtime": 180.2148,
55
+ "eval_samples_per_second": 88.555,
56
+ "eval_steps_per_second": 5.538,
57
+ "step": 400
58
+ },
59
+ {
60
+ "epoch": 0.03,
61
+ "learning_rate": 1.9473404820044594e-05,
62
+ "loss": 0.7733,
63
+ "step": 500
64
+ },
65
+ {
66
+ "epoch": 0.03,
67
+ "eval_accuracy": 0.8603838870222617,
68
+ "eval_f1": 0.8603838870222617,
69
+ "eval_loss": 0.45408895611763,
70
+ "eval_precision": 0.8603838870222617,
71
+ "eval_recall": 0.8603838870222617,
72
+ "eval_runtime": 179.2793,
73
+ "eval_samples_per_second": 89.018,
74
+ "eval_steps_per_second": 5.567,
75
+ "step": 500
76
+ },
77
+ {
78
+ "epoch": 0.03,
79
+ "eval_accuracy": 0.8186640239589626,
80
+ "eval_f1": 0.8186640239589626,
81
+ "eval_loss": 0.588881254196167,
82
+ "eval_precision": 0.8186640239589626,
83
+ "eval_recall": 0.8186640239589626,
84
+ "eval_runtime": 178.9676,
85
+ "eval_samples_per_second": 89.173,
86
+ "eval_steps_per_second": 5.576,
87
+ "step": 600
88
+ },
89
+ {
90
+ "epoch": 0.04,
91
+ "eval_accuracy": 0.8374099877288685,
92
+ "eval_f1": 0.8374099877288685,
93
+ "eval_loss": 0.5701665878295898,
94
+ "eval_precision": 0.8374099877288685,
95
+ "eval_recall": 0.8374099877288685,
96
+ "eval_runtime": 179.5722,
97
+ "eval_samples_per_second": 88.872,
98
+ "eval_steps_per_second": 5.558,
99
+ "step": 700
100
+ },
101
+ {
102
+ "epoch": 0.04,
103
+ "eval_accuracy": 0.8645771022868747,
104
+ "eval_f1": 0.8645771022868747,
105
+ "eval_loss": 0.45831653475761414,
106
+ "eval_precision": 0.8645771022868747,
107
+ "eval_recall": 0.8645771022868747,
108
+ "eval_runtime": 178.4787,
109
+ "eval_samples_per_second": 89.417,
110
+ "eval_steps_per_second": 5.592,
111
+ "step": 800
112
+ },
113
+ {
114
+ "epoch": 0.05,
115
+ "eval_accuracy": 0.8684816433991331,
116
+ "eval_f1": 0.8684816433991331,
117
+ "eval_loss": 0.4440499544143677,
118
+ "eval_precision": 0.8684816433991331,
119
+ "eval_recall": 0.8684816433991331,
120
+ "eval_runtime": 179.4524,
121
+ "eval_samples_per_second": 88.932,
122
+ "eval_steps_per_second": 5.561,
123
+ "step": 900
124
+ },
125
+ {
126
+ "epoch": 0.05,
127
+ "learning_rate": 1.894256290476696e-05,
128
+ "loss": 0.4221,
129
+ "step": 1000
130
+ },
131
+ {
132
+ "epoch": 0.05,
133
+ "eval_accuracy": 0.8310532043326915,
134
+ "eval_f1": 0.8310532043326915,
135
+ "eval_loss": 0.49495837092399597,
136
+ "eval_precision": 0.8310532043326915,
137
+ "eval_recall": 0.8310532043326915,
138
+ "eval_runtime": 177.3364,
139
+ "eval_samples_per_second": 89.993,
140
+ "eval_steps_per_second": 5.628,
141
+ "step": 1000
142
+ },
143
+ {
144
+ "epoch": 0.06,
145
+ "eval_accuracy": 0.8420863105955319,
146
+ "eval_f1": 0.8420863105955319,
147
+ "eval_loss": 0.563329815864563,
148
+ "eval_precision": 0.8420863105955319,
149
+ "eval_recall": 0.8420863105955319,
150
+ "eval_runtime": 177.8626,
151
+ "eval_samples_per_second": 89.727,
152
+ "eval_steps_per_second": 5.611,
153
+ "step": 1100
154
+ },
155
+ {
156
+ "epoch": 0.06,
157
+ "eval_accuracy": 0.850556069746056,
158
+ "eval_f1": 0.850556069746056,
159
+ "eval_loss": 0.4411052465438843,
160
+ "eval_precision": 0.850556069746056,
161
+ "eval_recall": 0.850556069746056,
162
+ "eval_runtime": 177.3605,
163
+ "eval_samples_per_second": 89.981,
164
+ "eval_steps_per_second": 5.627,
165
+ "step": 1200
166
+ },
167
+ {
168
+ "epoch": 0.07,
169
+ "eval_accuracy": 0.8742392543278803,
170
+ "eval_f1": 0.8742392543278803,
171
+ "eval_loss": 0.4354064464569092,
172
+ "eval_precision": 0.8742392543278803,
173
+ "eval_recall": 0.8742392543278803,
174
+ "eval_runtime": 177.2446,
175
+ "eval_samples_per_second": 90.039,
176
+ "eval_steps_per_second": 5.631,
177
+ "step": 1300
178
+ },
179
+ {
180
+ "epoch": 0.07,
181
+ "eval_accuracy": 0.8777866727774323,
182
+ "eval_f1": 0.8777866727774322,
183
+ "eval_loss": 0.3980981111526489,
184
+ "eval_precision": 0.8777866727774323,
185
+ "eval_recall": 0.8777866727774323,
186
+ "eval_runtime": 177.4209,
187
+ "eval_samples_per_second": 89.95,
188
+ "eval_steps_per_second": 5.625,
189
+ "step": 1400
190
+ },
191
+ {
192
+ "epoch": 0.08,
193
+ "learning_rate": 1.8412782673319888e-05,
194
+ "loss": 0.3623,
195
+ "step": 1500
196
+ },
197
+ {
198
+ "epoch": 0.08,
199
+ "eval_accuracy": 0.8690242714449679,
200
+ "eval_f1": 0.8690242714449679,
201
+ "eval_loss": 0.42456910014152527,
202
+ "eval_precision": 0.8690242714449679,
203
+ "eval_recall": 0.8690242714449679,
204
+ "eval_runtime": 177.8504,
205
+ "eval_samples_per_second": 89.733,
206
+ "eval_steps_per_second": 5.611,
207
+ "step": 1500
208
+ },
209
+ {
210
+ "epoch": 0.08,
211
+ "eval_accuracy": 0.8831891450582655,
212
+ "eval_f1": 0.8831891450582655,
213
+ "eval_loss": 0.4083055853843689,
214
+ "eval_precision": 0.8831891450582655,
215
+ "eval_recall": 0.8831891450582655,
216
+ "eval_runtime": 177.1688,
217
+ "eval_samples_per_second": 90.078,
218
+ "eval_steps_per_second": 5.633,
219
+ "step": 1600
220
+ },
221
+ {
222
+ "epoch": 0.09,
223
+ "eval_accuracy": 0.8346978395070913,
224
+ "eval_f1": 0.8346978395070913,
225
+ "eval_loss": 0.5854523181915283,
226
+ "eval_precision": 0.8346978395070913,
227
+ "eval_recall": 0.8346978395070913,
228
+ "eval_runtime": 177.5366,
229
+ "eval_samples_per_second": 89.891,
230
+ "eval_steps_per_second": 5.621,
231
+ "step": 1700
232
+ },
233
+ {
234
+ "epoch": 0.1,
235
+ "eval_accuracy": 0.8867871558850343,
236
+ "eval_f1": 0.8867871558850343,
237
+ "eval_loss": 0.39148494601249695,
238
+ "eval_precision": 0.8867871558850343,
239
+ "eval_recall": 0.8867871558850343,
240
+ "eval_runtime": 177.8685,
241
+ "eval_samples_per_second": 89.724,
242
+ "eval_steps_per_second": 5.611,
243
+ "step": 1800
244
+ },
245
+ {
246
+ "epoch": 0.1,
247
+ "eval_accuracy": 0.8668835194835213,
248
+ "eval_f1": 0.8668835194835214,
249
+ "eval_loss": 0.44608378410339355,
250
+ "eval_precision": 0.8668835194835213,
251
+ "eval_recall": 0.8668835194835213,
252
+ "eval_runtime": 177.5721,
253
+ "eval_samples_per_second": 89.873,
254
+ "eval_steps_per_second": 5.62,
255
+ "step": 1900
256
+ },
257
+ {
258
+ "epoch": 0.11,
259
+ "learning_rate": 1.7881940758042255e-05,
260
+ "loss": 0.3762,
261
+ "step": 2000
262
+ },
263
+ {
264
+ "epoch": 0.11,
265
+ "eval_accuracy": 0.8843170574679805,
266
+ "eval_f1": 0.8843170574679804,
267
+ "eval_loss": 0.3827630281448364,
268
+ "eval_precision": 0.8843170574679805,
269
+ "eval_recall": 0.8843170574679805,
270
+ "eval_runtime": 177.7736,
271
+ "eval_samples_per_second": 89.771,
272
+ "eval_steps_per_second": 5.614,
273
+ "step": 2000
274
+ },
275
+ {
276
+ "epoch": 0.11,
277
+ "eval_accuracy": 0.8833111619680236,
278
+ "eval_f1": 0.8833111619680236,
279
+ "eval_loss": 0.41039273142814636,
280
+ "eval_precision": 0.8833111619680236,
281
+ "eval_recall": 0.8833111619680236,
282
+ "eval_runtime": 177.7324,
283
+ "eval_samples_per_second": 89.792,
284
+ "eval_steps_per_second": 5.615,
285
+ "step": 2100
286
+ },
287
+ {
288
+ "epoch": 0.12,
289
+ "eval_accuracy": 0.8817051019932405,
290
+ "eval_f1": 0.8817051019932405,
291
+ "eval_loss": 0.41124606132507324,
292
+ "eval_precision": 0.8817051019932405,
293
+ "eval_recall": 0.8817051019932405,
294
+ "eval_runtime": 177.9802,
295
+ "eval_samples_per_second": 89.667,
296
+ "eval_steps_per_second": 5.607,
297
+ "step": 2200
298
+ },
299
+ {
300
+ "epoch": 0.12,
301
+ "eval_accuracy": 0.8908226419736186,
302
+ "eval_f1": 0.8908226419736186,
303
+ "eval_loss": 0.3715788424015045,
304
+ "eval_precision": 0.8908226419736186,
305
+ "eval_recall": 0.8908226419736186,
306
+ "eval_runtime": 181.2362,
307
+ "eval_samples_per_second": 88.056,
308
+ "eval_steps_per_second": 5.507,
309
+ "step": 2300
310
+ },
311
+ {
312
+ "epoch": 0.13,
313
+ "eval_accuracy": 0.8952380668950267,
314
+ "eval_f1": 0.8952380668950267,
315
+ "eval_loss": 0.38714146614074707,
316
+ "eval_precision": 0.8952380668950267,
317
+ "eval_recall": 0.8952380668950267,
318
+ "eval_runtime": 185.8786,
319
+ "eval_samples_per_second": 85.857,
320
+ "eval_steps_per_second": 5.369,
321
+ "step": 2400
322
+ },
323
+ {
324
+ "epoch": 0.13,
325
+ "learning_rate": 1.7351098842764628e-05,
326
+ "loss": 0.3049,
327
+ "step": 2500
328
+ },
329
+ {
330
+ "epoch": 0.13,
331
+ "eval_accuracy": 0.8558087489100319,
332
+ "eval_f1": 0.8558087489100319,
333
+ "eval_loss": 0.5354240536689758,
334
+ "eval_precision": 0.8558087489100319,
335
+ "eval_recall": 0.8558087489100319,
336
+ "eval_runtime": 179.1088,
337
+ "eval_samples_per_second": 89.102,
338
+ "eval_steps_per_second": 5.572,
339
+ "step": 2500
340
+ },
341
+ {
342
+ "epoch": 0.14,
343
+ "eval_accuracy": 0.8584157443477899,
344
+ "eval_f1": 0.8584157443477899,
345
+ "eval_loss": 0.5085635781288147,
346
+ "eval_precision": 0.8584157443477899,
347
+ "eval_recall": 0.8584157443477899,
348
+ "eval_runtime": 178.0144,
349
+ "eval_samples_per_second": 89.65,
350
+ "eval_steps_per_second": 5.606,
351
+ "step": 2600
352
+ },
353
+ {
354
+ "epoch": 0.14,
355
+ "eval_accuracy": 0.8527087257962596,
356
+ "eval_f1": 0.8527087257962596,
357
+ "eval_loss": 0.5149290561676025,
358
+ "eval_precision": 0.8527087257962596,
359
+ "eval_recall": 0.8527087257962596,
360
+ "eval_runtime": 178.6366,
361
+ "eval_samples_per_second": 89.338,
362
+ "eval_steps_per_second": 5.587,
363
+ "step": 2700
364
+ },
365
+ {
366
+ "epoch": 0.15,
367
+ "eval_accuracy": 0.8276535949852042,
368
+ "eval_f1": 0.8276535949852042,
369
+ "eval_loss": 0.7321985363960266,
370
+ "eval_precision": 0.8276535949852042,
371
+ "eval_recall": 0.8276535949852042,
372
+ "eval_runtime": 180.6283,
373
+ "eval_samples_per_second": 88.353,
374
+ "eval_steps_per_second": 5.525,
375
+ "step": 2800
376
+ },
377
+ {
378
+ "epoch": 0.15,
379
+ "eval_accuracy": 0.8446089854045952,
380
+ "eval_f1": 0.8446089854045952,
381
+ "eval_loss": 0.5994013547897339,
382
+ "eval_precision": 0.8446089854045952,
383
+ "eval_recall": 0.8446089854045952,
384
+ "eval_runtime": 181.3155,
385
+ "eval_samples_per_second": 88.018,
386
+ "eval_steps_per_second": 5.504,
387
+ "step": 2900
388
+ },
389
+ {
390
+ "epoch": 0.16,
391
+ "learning_rate": 1.6820256927486995e-05,
392
+ "loss": 0.272,
393
+ "step": 3000
394
+ },
395
+ {
396
+ "epoch": 0.16,
397
+ "eval_accuracy": 0.8696780043192002,
398
+ "eval_f1": 0.8696780043192002,
399
+ "eval_loss": 0.5098685622215271,
400
+ "eval_precision": 0.8696780043192002,
401
+ "eval_recall": 0.8696780043192002,
402
+ "eval_runtime": 182.3242,
403
+ "eval_samples_per_second": 87.531,
404
+ "eval_steps_per_second": 5.474,
405
+ "step": 3000
406
+ },
407
+ {
408
+ "epoch": 0.16,
409
+ "eval_accuracy": 0.8661514180249728,
410
+ "eval_f1": 0.8661514180249729,
411
+ "eval_loss": 0.46759557723999023,
412
+ "eval_precision": 0.8661514180249728,
413
+ "eval_recall": 0.8661514180249728,
414
+ "eval_runtime": 185.214,
415
+ "eval_samples_per_second": 86.165,
416
+ "eval_steps_per_second": 5.388,
417
+ "step": 3100
418
+ },
419
+ {
420
+ "epoch": 0.17,
421
+ "eval_accuracy": 0.8626764161153585,
422
+ "eval_f1": 0.8626764161153585,
423
+ "eval_loss": 0.5452213287353516,
424
+ "eval_precision": 0.8626764161153585,
425
+ "eval_recall": 0.8626764161153585,
426
+ "eval_runtime": 181.9867,
427
+ "eval_samples_per_second": 87.693,
428
+ "eval_steps_per_second": 5.484,
429
+ "step": 3200
430
+ },
431
+ {
432
+ "epoch": 0.18,
433
+ "eval_accuracy": 0.8966457253905286,
434
+ "eval_f1": 0.8966457253905286,
435
+ "eval_loss": 0.38049089908599854,
436
+ "eval_precision": 0.8966457253905286,
437
+ "eval_recall": 0.8966457253905286,
438
+ "eval_runtime": 183.5682,
439
+ "eval_samples_per_second": 86.938,
440
+ "eval_steps_per_second": 5.437,
441
+ "step": 3300
442
+ },
443
+ {
444
+ "epoch": 0.18,
445
+ "eval_accuracy": 0.887366488204536,
446
+ "eval_f1": 0.887366488204536,
447
+ "eval_loss": 0.41261956095695496,
448
+ "eval_precision": 0.887366488204536,
449
+ "eval_recall": 0.887366488204536,
450
+ "eval_runtime": 186.6798,
451
+ "eval_samples_per_second": 85.489,
452
+ "eval_steps_per_second": 5.346,
453
+ "step": 3400
454
+ },
455
+ {
456
+ "epoch": 0.19,
457
+ "learning_rate": 1.629047669603992e-05,
458
+ "loss": 0.3686,
459
+ "step": 3500
460
+ },
461
+ {
462
+ "epoch": 0.19,
463
+ "eval_accuracy": 0.8557442684292654,
464
+ "eval_f1": 0.8557442684292654,
465
+ "eval_loss": 0.5042837262153625,
466
+ "eval_precision": 0.8557442684292654,
467
+ "eval_recall": 0.8557442684292654,
468
+ "eval_runtime": 186.7579,
469
+ "eval_samples_per_second": 85.453,
470
+ "eval_steps_per_second": 5.344,
471
+ "step": 3500
472
+ },
473
+ {
474
+ "epoch": 0.19,
475
+ "eval_accuracy": 0.8617965055547454,
476
+ "eval_f1": 0.8617965055547454,
477
+ "eval_loss": 0.46751198172569275,
478
+ "eval_precision": 0.8617965055547454,
479
+ "eval_recall": 0.8617965055547454,
480
+ "eval_runtime": 198.6805,
481
+ "eval_samples_per_second": 80.325,
482
+ "eval_steps_per_second": 5.023,
483
+ "step": 3600
484
+ },
485
+ {
486
+ "epoch": 0.2,
487
+ "eval_accuracy": 0.8682514976831667,
488
+ "eval_f1": 0.8682514976831667,
489
+ "eval_loss": 0.4736296832561493,
490
+ "eval_precision": 0.8682514976831667,
491
+ "eval_recall": 0.8682514976831667,
492
+ "eval_runtime": 198.48,
493
+ "eval_samples_per_second": 80.406,
494
+ "eval_steps_per_second": 5.028,
495
+ "step": 3700
496
+ },
497
+ {
498
+ "epoch": 0.2,
499
+ "eval_accuracy": 0.8476574241337543,
500
+ "eval_f1": 0.8476574241337542,
501
+ "eval_loss": 0.5243175029754639,
502
+ "eval_precision": 0.8476574241337543,
503
+ "eval_recall": 0.8476574241337543,
504
+ "eval_runtime": 193.5062,
505
+ "eval_samples_per_second": 82.473,
506
+ "eval_steps_per_second": 5.157,
507
+ "step": 3800
508
+ },
509
+ {
510
+ "epoch": 0.21,
511
+ "eval_accuracy": 0.8786953515525412,
512
+ "eval_f1": 0.8786953515525412,
513
+ "eval_loss": 0.4317740201950073,
514
+ "eval_precision": 0.8786953515525412,
515
+ "eval_recall": 0.8786953515525412,
516
+ "eval_runtime": 191.9223,
517
+ "eval_samples_per_second": 83.153,
518
+ "eval_steps_per_second": 5.2,
519
+ "step": 3900
520
+ },
521
+ {
522
+ "epoch": 0.21,
523
+ "learning_rate": 1.575963478076229e-05,
524
+ "loss": 0.2712,
525
+ "step": 4000
526
+ },
527
+ {
528
+ "epoch": 0.21,
529
+ "eval_accuracy": 0.8783630290747447,
530
+ "eval_f1": 0.8783630290747447,
531
+ "eval_loss": 0.43737560510635376,
532
+ "eval_precision": 0.8783630290747447,
533
+ "eval_recall": 0.8783630290747447,
534
+ "eval_runtime": 190.6186,
535
+ "eval_samples_per_second": 83.722,
536
+ "eval_steps_per_second": 5.236,
537
+ "step": 4000
538
+ },
539
+ {
540
+ "epoch": 0.22,
541
+ "eval_accuracy": 0.8703069370085224,
542
+ "eval_f1": 0.8703069370085224,
543
+ "eval_loss": 0.5073068737983704,
544
+ "eval_precision": 0.8703069370085224,
545
+ "eval_recall": 0.8703069370085224,
546
+ "eval_runtime": 186.0675,
547
+ "eval_samples_per_second": 85.77,
548
+ "eval_steps_per_second": 5.364,
549
+ "step": 4100
550
+ },
551
+ {
552
+ "epoch": 0.22,
553
+ "eval_accuracy": 0.8908900984765742,
554
+ "eval_f1": 0.8908900984765742,
555
+ "eval_loss": 0.4120965301990509,
556
+ "eval_precision": 0.8908900984765742,
557
+ "eval_recall": 0.8908900984765742,
558
+ "eval_runtime": 178.3397,
559
+ "eval_samples_per_second": 89.487,
560
+ "eval_steps_per_second": 5.596,
561
+ "step": 4200
562
+ },
563
+ {
564
+ "epoch": 0.23,
565
+ "eval_accuracy": 0.8742471903870516,
566
+ "eval_f1": 0.8742471903870516,
567
+ "eval_loss": 0.481146901845932,
568
+ "eval_precision": 0.8742471903870516,
569
+ "eval_recall": 0.8742471903870516,
570
+ "eval_runtime": 179.3822,
571
+ "eval_samples_per_second": 88.966,
572
+ "eval_steps_per_second": 5.564,
573
+ "step": 4300
574
+ },
575
+ {
576
+ "epoch": 0.23,
577
+ "eval_accuracy": 0.8648419682617153,
578
+ "eval_f1": 0.8648419682617154,
579
+ "eval_loss": 0.5382417440414429,
580
+ "eval_precision": 0.8648419682617153,
581
+ "eval_recall": 0.8648419682617153,
582
+ "eval_runtime": 180.6189,
583
+ "eval_samples_per_second": 88.357,
584
+ "eval_steps_per_second": 5.525,
585
+ "step": 4400
586
+ },
587
+ {
588
+ "epoch": 0.24,
589
+ "learning_rate": 1.5228792865484661e-05,
590
+ "loss": 0.2641,
591
+ "step": 4500
592
+ },
593
+ {
594
+ "epoch": 0.24,
595
+ "eval_accuracy": 0.8675372523577536,
596
+ "eval_f1": 0.8675372523577536,
597
+ "eval_loss": 0.5521109104156494,
598
+ "eval_precision": 0.8675372523577536,
599
+ "eval_recall": 0.8675372523577536,
600
+ "eval_runtime": 178.45,
601
+ "eval_samples_per_second": 89.431,
602
+ "eval_steps_per_second": 5.593,
603
+ "step": 4500
604
+ },
605
+ {
606
+ "epoch": 0.24,
607
+ "eval_accuracy": 0.8621943005207047,
608
+ "eval_f1": 0.8621943005207047,
609
+ "eval_loss": 0.5831220149993896,
610
+ "eval_precision": 0.8621943005207047,
611
+ "eval_recall": 0.8621943005207047,
612
+ "eval_runtime": 258.7275,
613
+ "eval_samples_per_second": 61.683,
614
+ "eval_steps_per_second": 3.857,
615
+ "step": 4600
616
+ },
617
+ {
618
+ "epoch": 0.25,
619
+ "eval_accuracy": 0.871682851267339,
620
+ "eval_f1": 0.871682851267339,
621
+ "eval_loss": 0.46240246295928955,
622
+ "eval_precision": 0.871682851267339,
623
+ "eval_recall": 0.871682851267339,
624
+ "eval_runtime": 2013.7143,
625
+ "eval_samples_per_second": 7.925,
626
+ "eval_steps_per_second": 0.496,
627
+ "step": 4700
628
+ },
629
+ {
630
+ "epoch": 0.25,
631
+ "eval_accuracy": 0.8670442246817393,
632
+ "eval_f1": 0.8670442246817393,
633
+ "eval_loss": 0.5278752446174622,
634
+ "eval_precision": 0.8670442246817393,
635
+ "eval_recall": 0.8670442246817393,
636
+ "eval_runtime": 1506.7949,
637
+ "eval_samples_per_second": 10.591,
638
+ "eval_steps_per_second": 0.662,
639
+ "step": 4800
640
+ }
641
+ ],
642
+ "logging_steps": 500,
643
+ "max_steps": 18838,
644
+ "num_train_epochs": 1,
645
+ "save_steps": 100,
646
+ "total_flos": 1.06818139717632e+16,
647
+ "trial_name": null,
648
+ "trial_params": null
649
+ }
checkpoint-4800/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b490d1b8547fb726aeded429a6044ce4a80f8def8ff6d44f9c25d0df4b1c0ff9
3
+ size 4792