diogopaes10 commited on
Commit
3fb26ef
1 Parent(s): cd6047c

End of training

Browse files
checkpoint-12000/config.json DELETED
@@ -1,50 +0,0 @@
1
- {
2
- "_name_or_path": "microsoft/MiniLM-L12-H384-uncased",
3
- "architectures": [
4
- "BertForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "hidden_act": "gelu",
9
- "hidden_dropout_prob": 0.1,
10
- "hidden_size": 384,
11
- "id2label": {
12
- "0": "Society & Culture",
13
- "1": "Science & Mathematics",
14
- "2": "Health",
15
- "3": "Education & Reference",
16
- "4": "Computers & Internet",
17
- "5": "Sports",
18
- "6": "Business & Finance",
19
- "7": "Entertainment & Music",
20
- "8": "Family & Relationships",
21
- "9": "Politics & Government"
22
- },
23
- "initializer_range": 0.02,
24
- "intermediate_size": 1536,
25
- "label2id": {
26
- "Business & Finance": 6,
27
- "Computers & Internet": 4,
28
- "Education & Reference": 3,
29
- "Entertainment & Music": 7,
30
- "Family & Relationships": 8,
31
- "Health": 2,
32
- "Politics & Government": 9,
33
- "Science & Mathematics": 1,
34
- "Society & Culture": 0,
35
- "Sports": 5
36
- },
37
- "layer_norm_eps": 1e-12,
38
- "max_position_embeddings": 512,
39
- "model_type": "bert",
40
- "num_attention_heads": 12,
41
- "num_hidden_layers": 12,
42
- "pad_token_id": 0,
43
- "position_embedding_type": "absolute",
44
- "problem_type": "single_label_classification",
45
- "torch_dtype": "float32",
46
- "transformers_version": "4.31.0",
47
- "type_vocab_size": 2,
48
- "use_cache": true,
49
- "vocab_size": 30522
50
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-12000/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:14dbf8bfdaff0d358c50ba58b2e37e228971d18c40ded342ff3414bc23c55625
3
- size 267028677
 
 
 
 
checkpoint-12000/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f464404bbabf4ffc076175dc0dd64bed9ebbae82e692e4cc0d024912d83b0bf7
3
- size 133523761
 
 
 
 
checkpoint-12000/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e99597d92b26233312419b8d7606c8fbe7f7558947be013db496307a49ea89f
3
- size 14575
 
 
 
 
checkpoint-12000/special_tokens_map.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
7
- }
 
 
 
 
 
 
 
 
checkpoint-12000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint-12000/tokenizer_config.json DELETED
@@ -1,15 +0,0 @@
1
- {
2
- "clean_up_tokenization_spaces": true,
3
- "cls_token": "[CLS]",
4
- "do_basic_tokenize": true,
5
- "do_lower_case": true,
6
- "mask_token": "[MASK]",
7
- "model_max_length": 1000000000000000019884624838656,
8
- "never_split": null,
9
- "pad_token": "[PAD]",
10
- "sep_token": "[SEP]",
11
- "strip_accents": null,
12
- "tokenize_chinese_chars": true,
13
- "tokenizer_class": "BertTokenizer",
14
- "unk_token": "[UNK]"
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-12000/trainer_state.json DELETED
@@ -1,516 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 4.8,
5
- "global_step": 12000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.0,
12
- "learning_rate": 1.99984e-05,
13
- "loss": 2.3075,
14
- "step": 1
15
- },
16
- {
17
- "epoch": 0.25,
18
- "learning_rate": 1.90016e-05,
19
- "loss": 1.5316,
20
- "step": 625
21
- },
22
- {
23
- "epoch": 0.25,
24
- "eval_accuracy": 0.69285,
25
- "eval_disk_space_total": 78.1898422241211,
26
- "eval_disk_space_used": 25.717952728271484,
27
- "eval_f1": 0.6823748842925246,
28
- "eval_gpu_ram_allocated": 0.39347171783447266,
29
- "eval_gpu_ram_cached": 16.05078125,
30
- "eval_gpu_ram_total": 39.56402587890625,
31
- "eval_gpu_utilization": 33,
32
- "eval_loss": 1.1301642656326294,
33
- "eval_precision": 0.6859286816875455,
34
- "eval_recall": 0.69285,
35
- "eval_runtime": 15.5551,
36
- "eval_samples_per_second": 1285.751,
37
- "eval_steps_per_second": 40.18,
38
- "eval_system_ram_total": 83.48074722290039,
39
- "eval_system_ram_used": 4.108898162841797,
40
- "step": 625
41
- },
42
- {
43
- "epoch": 0.5,
44
- "learning_rate": 1.80016e-05,
45
- "loss": 1.0615,
46
- "step": 1250
47
- },
48
- {
49
- "epoch": 0.5,
50
- "eval_accuracy": 0.7049,
51
- "eval_disk_space_total": 78.1898422241211,
52
- "eval_disk_space_used": 26.091278076171875,
53
- "eval_f1": 0.7010538468880997,
54
- "eval_gpu_ram_allocated": 0.39355039596557617,
55
- "eval_gpu_ram_cached": 16.05078125,
56
- "eval_gpu_ram_total": 39.56402587890625,
57
- "eval_gpu_utilization": 33,
58
- "eval_loss": 1.002210259437561,
59
- "eval_precision": 0.7064795769852438,
60
- "eval_recall": 0.7049,
61
- "eval_runtime": 15.2502,
62
- "eval_samples_per_second": 1311.462,
63
- "eval_steps_per_second": 40.983,
64
- "eval_system_ram_total": 83.48074722290039,
65
- "eval_system_ram_used": 3.858478546142578,
66
- "step": 1250
67
- },
68
- {
69
- "epoch": 0.75,
70
- "learning_rate": 1.70016e-05,
71
- "loss": 0.9804,
72
- "step": 1875
73
- },
74
- {
75
- "epoch": 0.75,
76
- "eval_accuracy": 0.71915,
77
- "eval_disk_space_total": 78.1898422241211,
78
- "eval_disk_space_used": 26.464637756347656,
79
- "eval_f1": 0.7157918638422971,
80
- "eval_gpu_ram_allocated": 0.39350461959838867,
81
- "eval_gpu_ram_cached": 16.05078125,
82
- "eval_gpu_ram_total": 39.56402587890625,
83
- "eval_gpu_utilization": 33,
84
- "eval_loss": 0.9258390665054321,
85
- "eval_precision": 0.7200804939824763,
86
- "eval_recall": 0.71915,
87
- "eval_runtime": 15.2672,
88
- "eval_samples_per_second": 1310.0,
89
- "eval_steps_per_second": 40.937,
90
- "eval_system_ram_total": 83.48074722290039,
91
- "eval_system_ram_used": 3.863994598388672,
92
- "step": 1875
93
- },
94
- {
95
- "epoch": 1.0,
96
- "learning_rate": 1.6001600000000003e-05,
97
- "loss": 0.9244,
98
- "step": 2500
99
- },
100
- {
101
- "epoch": 1.0,
102
- "eval_accuracy": 0.7286,
103
- "eval_disk_space_total": 78.1898422241211,
104
- "eval_disk_space_used": 26.464874267578125,
105
- "eval_f1": 0.721912767172515,
106
- "eval_gpu_ram_allocated": 0.39352893829345703,
107
- "eval_gpu_ram_cached": 16.05078125,
108
- "eval_gpu_ram_total": 39.56402587890625,
109
- "eval_gpu_utilization": 32,
110
- "eval_loss": 0.8794927000999451,
111
- "eval_precision": 0.7265669055087627,
112
- "eval_recall": 0.7286,
113
- "eval_runtime": 15.2593,
114
- "eval_samples_per_second": 1310.678,
115
- "eval_steps_per_second": 40.959,
116
- "eval_system_ram_total": 83.48074722290039,
117
- "eval_system_ram_used": 3.881519317626953,
118
- "step": 2500
119
- },
120
- {
121
- "epoch": 1.25,
122
- "learning_rate": 1.50016e-05,
123
- "loss": 0.8471,
124
- "step": 3125
125
- },
126
- {
127
- "epoch": 1.25,
128
- "eval_accuracy": 0.73045,
129
- "eval_disk_space_total": 78.1898422241211,
130
- "eval_disk_space_used": 26.465320587158203,
131
- "eval_f1": 0.7243049440325827,
132
- "eval_gpu_ram_allocated": 0.39346885681152344,
133
- "eval_gpu_ram_cached": 16.05078125,
134
- "eval_gpu_ram_total": 39.56402587890625,
135
- "eval_gpu_utilization": 31,
136
- "eval_loss": 0.8885928988456726,
137
- "eval_precision": 0.7280280015075138,
138
- "eval_recall": 0.73045,
139
- "eval_runtime": 15.2207,
140
- "eval_samples_per_second": 1313.996,
141
- "eval_steps_per_second": 41.062,
142
- "eval_system_ram_total": 83.48074722290039,
143
- "eval_system_ram_used": 4.031795501708984,
144
- "step": 3125
145
- },
146
- {
147
- "epoch": 1.5,
148
- "learning_rate": 1.4001600000000002e-05,
149
- "loss": 0.8294,
150
- "step": 3750
151
- },
152
- {
153
- "epoch": 1.5,
154
- "eval_accuracy": 0.7303,
155
- "eval_disk_space_total": 78.1898422241211,
156
- "eval_disk_space_used": 26.465579986572266,
157
- "eval_f1": 0.7285125149375191,
158
- "eval_gpu_ram_allocated": 0.39354896545410156,
159
- "eval_gpu_ram_cached": 16.05078125,
160
- "eval_gpu_ram_total": 39.56402587890625,
161
- "eval_gpu_utilization": 33,
162
- "eval_loss": 0.8648403286933899,
163
- "eval_precision": 0.7304270054757978,
164
- "eval_recall": 0.7303,
165
- "eval_runtime": 15.1251,
166
- "eval_samples_per_second": 1322.302,
167
- "eval_steps_per_second": 41.322,
168
- "eval_system_ram_total": 83.48074722290039,
169
- "eval_system_ram_used": 3.822803497314453,
170
- "step": 3750
171
- },
172
- {
173
- "epoch": 1.75,
174
- "learning_rate": 1.30032e-05,
175
- "loss": 0.8229,
176
- "step": 4375
177
- },
178
- {
179
- "epoch": 1.75,
180
- "eval_accuracy": 0.7347,
181
- "eval_disk_space_total": 78.1898422241211,
182
- "eval_disk_space_used": 26.4658203125,
183
- "eval_f1": 0.7306008054961884,
184
- "eval_gpu_ram_allocated": 0.3934760093688965,
185
- "eval_gpu_ram_cached": 16.05078125,
186
- "eval_gpu_ram_total": 39.56402587890625,
187
- "eval_gpu_utilization": 32,
188
- "eval_loss": 0.8477036952972412,
189
- "eval_precision": 0.7313531828325577,
190
- "eval_recall": 0.7347,
191
- "eval_runtime": 15.2385,
192
- "eval_samples_per_second": 1312.463,
193
- "eval_steps_per_second": 41.014,
194
- "eval_system_ram_total": 83.48074722290039,
195
- "eval_system_ram_used": 3.870433807373047,
196
- "step": 4375
197
- },
198
- {
199
- "epoch": 2.0,
200
- "learning_rate": 1.2003200000000002e-05,
201
- "loss": 0.8227,
202
- "step": 5000
203
- },
204
- {
205
- "epoch": 2.0,
206
- "eval_accuracy": 0.7321,
207
- "eval_disk_space_total": 78.1898422241211,
208
- "eval_disk_space_used": 26.466053009033203,
209
- "eval_f1": 0.729989989450906,
210
- "eval_gpu_ram_allocated": 0.3935103416442871,
211
- "eval_gpu_ram_cached": 16.05078125,
212
- "eval_gpu_ram_total": 39.56402587890625,
213
- "eval_gpu_utilization": 34,
214
- "eval_loss": 0.8513504862785339,
215
- "eval_precision": 0.7342944597310201,
216
- "eval_recall": 0.7321,
217
- "eval_runtime": 15.2256,
218
- "eval_samples_per_second": 1313.581,
219
- "eval_steps_per_second": 41.049,
220
- "eval_system_ram_total": 83.48074722290039,
221
- "eval_system_ram_used": 3.8656463623046875,
222
- "step": 5000
223
- },
224
- {
225
- "epoch": 2.25,
226
- "learning_rate": 1.10048e-05,
227
- "loss": 0.7515,
228
- "step": 5625
229
- },
230
- {
231
- "epoch": 2.25,
232
- "eval_accuracy": 0.73265,
233
- "eval_disk_space_total": 78.1898422241211,
234
- "eval_disk_space_used": 26.46636199951172,
235
- "eval_f1": 0.7286075365290565,
236
- "eval_gpu_ram_allocated": 0.3934974670410156,
237
- "eval_gpu_ram_cached": 16.05078125,
238
- "eval_gpu_ram_total": 39.56402587890625,
239
- "eval_gpu_utilization": 32,
240
- "eval_loss": 0.857990026473999,
241
- "eval_precision": 0.7324340392002969,
242
- "eval_recall": 0.73265,
243
- "eval_runtime": 15.6042,
244
- "eval_samples_per_second": 1281.705,
245
- "eval_steps_per_second": 40.053,
246
- "eval_system_ram_total": 83.48074722290039,
247
- "eval_system_ram_used": 4.057643890380859,
248
- "step": 5625
249
- },
250
- {
251
- "epoch": 2.5,
252
- "learning_rate": 1.0004800000000001e-05,
253
- "loss": 0.7523,
254
- "step": 6250
255
- },
256
- {
257
- "epoch": 2.5,
258
- "eval_accuracy": 0.734,
259
- "eval_disk_space_total": 78.1898422241211,
260
- "eval_disk_space_used": 26.46658706665039,
261
- "eval_f1": 0.7295923418650617,
262
- "eval_gpu_ram_allocated": 0.39351463317871094,
263
- "eval_gpu_ram_cached": 16.05078125,
264
- "eval_gpu_ram_total": 39.56402587890625,
265
- "eval_gpu_utilization": 32,
266
- "eval_loss": 0.8498074412345886,
267
- "eval_precision": 0.7313935171526833,
268
- "eval_recall": 0.734,
269
- "eval_runtime": 15.7189,
270
- "eval_samples_per_second": 1272.356,
271
- "eval_steps_per_second": 39.761,
272
- "eval_system_ram_total": 83.48074722290039,
273
- "eval_system_ram_used": 3.8655662536621094,
274
- "step": 6250
275
- },
276
- {
277
- "epoch": 2.75,
278
- "learning_rate": 9.0048e-06,
279
- "loss": 0.7396,
280
- "step": 6875
281
- },
282
- {
283
- "epoch": 2.75,
284
- "eval_accuracy": 0.73645,
285
- "eval_disk_space_total": 78.1898422241211,
286
- "eval_disk_space_used": 26.466869354248047,
287
- "eval_f1": 0.7326318327243708,
288
- "eval_gpu_ram_allocated": 0.3934817314147949,
289
- "eval_gpu_ram_cached": 16.05078125,
290
- "eval_gpu_ram_total": 39.56402587890625,
291
- "eval_gpu_utilization": 33,
292
- "eval_loss": 0.8402908444404602,
293
- "eval_precision": 0.7323211491997443,
294
- "eval_recall": 0.73645,
295
- "eval_runtime": 15.1367,
296
- "eval_samples_per_second": 1321.295,
297
- "eval_steps_per_second": 41.29,
298
- "eval_system_ram_total": 83.48074722290039,
299
- "eval_system_ram_used": 3.8685684204101562,
300
- "step": 6875
301
- },
302
- {
303
- "epoch": 3.0,
304
- "learning_rate": 8.0048e-06,
305
- "loss": 0.7308,
306
- "step": 7500
307
- },
308
- {
309
- "epoch": 3.0,
310
- "eval_accuracy": 0.73775,
311
- "eval_disk_space_total": 78.1898422241211,
312
- "eval_disk_space_used": 26.467105865478516,
313
- "eval_f1": 0.7347713515400808,
314
- "eval_gpu_ram_allocated": 0.3934803009033203,
315
- "eval_gpu_ram_cached": 16.05078125,
316
- "eval_gpu_ram_total": 39.56402587890625,
317
- "eval_gpu_utilization": 26,
318
- "eval_loss": 0.8414311408996582,
319
- "eval_precision": 0.7338961618089487,
320
- "eval_recall": 0.73775,
321
- "eval_runtime": 15.1121,
322
- "eval_samples_per_second": 1323.444,
323
- "eval_steps_per_second": 41.358,
324
- "eval_system_ram_total": 83.48074722290039,
325
- "eval_system_ram_used": 3.8610610961914062,
326
- "step": 7500
327
- },
328
- {
329
- "epoch": 3.25,
330
- "learning_rate": 7.0048e-06,
331
- "loss": 0.6929,
332
- "step": 8125
333
- },
334
- {
335
- "epoch": 3.25,
336
- "eval_accuracy": 0.73505,
337
- "eval_disk_space_total": 78.1898422241211,
338
- "eval_disk_space_used": 26.468040466308594,
339
- "eval_f1": 0.7321519443896675,
340
- "eval_gpu_ram_allocated": 0.39360761642456055,
341
- "eval_gpu_ram_cached": 16.05078125,
342
- "eval_gpu_ram_total": 39.56402587890625,
343
- "eval_gpu_utilization": 29,
344
- "eval_loss": 0.855096697807312,
345
- "eval_precision": 0.7376195934146232,
346
- "eval_recall": 0.73505,
347
- "eval_runtime": 15.2462,
348
- "eval_samples_per_second": 1311.799,
349
- "eval_steps_per_second": 40.994,
350
- "eval_system_ram_total": 83.48074722290039,
351
- "eval_system_ram_used": 4.056495666503906,
352
- "step": 8125
353
- },
354
- {
355
- "epoch": 3.5,
356
- "learning_rate": 6.0048000000000005e-06,
357
- "loss": 0.6772,
358
- "step": 8750
359
- },
360
- {
361
- "epoch": 3.5,
362
- "eval_accuracy": 0.738,
363
- "eval_disk_space_total": 78.1898422241211,
364
- "eval_disk_space_used": 26.468387603759766,
365
- "eval_f1": 0.7334835535994888,
366
- "eval_gpu_ram_allocated": 0.3935232162475586,
367
- "eval_gpu_ram_cached": 16.05078125,
368
- "eval_gpu_ram_total": 39.56402587890625,
369
- "eval_gpu_utilization": 31,
370
- "eval_loss": 0.84714674949646,
371
- "eval_precision": 0.7326757628965238,
372
- "eval_recall": 0.738,
373
- "eval_runtime": 15.3571,
374
- "eval_samples_per_second": 1302.33,
375
- "eval_steps_per_second": 40.698,
376
- "eval_system_ram_total": 83.48074722290039,
377
- "eval_system_ram_used": 3.835094451904297,
378
- "step": 8750
379
- },
380
- {
381
- "epoch": 3.75,
382
- "learning_rate": 5.0064e-06,
383
- "loss": 0.682,
384
- "step": 9375
385
- },
386
- {
387
- "epoch": 3.75,
388
- "eval_accuracy": 0.735,
389
- "eval_disk_space_total": 78.1898422241211,
390
- "eval_disk_space_used": 26.468624114990234,
391
- "eval_f1": 0.731102054402859,
392
- "eval_gpu_ram_allocated": 0.3934817314147949,
393
- "eval_gpu_ram_cached": 16.05078125,
394
- "eval_gpu_ram_total": 39.56402587890625,
395
- "eval_gpu_utilization": 34,
396
- "eval_loss": 0.8460220694541931,
397
- "eval_precision": 0.7310632962275837,
398
- "eval_recall": 0.735,
399
- "eval_runtime": 15.2336,
400
- "eval_samples_per_second": 1312.891,
401
- "eval_steps_per_second": 41.028,
402
- "eval_system_ram_total": 83.48074722290039,
403
- "eval_system_ram_used": 3.8781509399414062,
404
- "step": 9375
405
- },
406
- {
407
- "epoch": 4.0,
408
- "learning_rate": 4.0064e-06,
409
- "loss": 0.6741,
410
- "step": 10000
411
- },
412
- {
413
- "epoch": 4.0,
414
- "eval_accuracy": 0.7376,
415
- "eval_disk_space_total": 78.1898422241211,
416
- "eval_disk_space_used": 26.46900177001953,
417
- "eval_f1": 0.7334789109935773,
418
- "eval_gpu_ram_allocated": 0.393521785736084,
419
- "eval_gpu_ram_cached": 16.05078125,
420
- "eval_gpu_ram_total": 39.56402587890625,
421
- "eval_gpu_utilization": 31,
422
- "eval_loss": 0.8409038782119751,
423
- "eval_precision": 0.7329890286601207,
424
- "eval_recall": 0.7376,
425
- "eval_runtime": 15.1968,
426
- "eval_samples_per_second": 1316.067,
427
- "eval_steps_per_second": 41.127,
428
- "eval_system_ram_total": 83.48074722290039,
429
- "eval_system_ram_used": 3.8847808837890625,
430
- "step": 10000
431
- },
432
- {
433
- "epoch": 4.25,
434
- "learning_rate": 3.0064000000000006e-06,
435
- "loss": 0.6247,
436
- "step": 10625
437
- },
438
- {
439
- "epoch": 4.25,
440
- "eval_accuracy": 0.736,
441
- "eval_disk_space_total": 78.1898422241211,
442
- "eval_disk_space_used": 26.469379425048828,
443
- "eval_f1": 0.7332127747065252,
444
- "eval_gpu_ram_allocated": 0.39348888397216797,
445
- "eval_gpu_ram_cached": 16.05078125,
446
- "eval_gpu_ram_total": 39.56402587890625,
447
- "eval_gpu_utilization": 32,
448
- "eval_loss": 0.8499526381492615,
449
- "eval_precision": 0.7324176547179257,
450
- "eval_recall": 0.736,
451
- "eval_runtime": 15.3802,
452
- "eval_samples_per_second": 1300.37,
453
- "eval_steps_per_second": 40.637,
454
- "eval_system_ram_total": 83.48074722290039,
455
- "eval_system_ram_used": 4.0838470458984375,
456
- "step": 10625
457
- },
458
- {
459
- "epoch": 4.5,
460
- "learning_rate": 2.0064000000000004e-06,
461
- "loss": 0.6446,
462
- "step": 11250
463
- },
464
- {
465
- "epoch": 4.5,
466
- "eval_accuracy": 0.7358,
467
- "eval_disk_space_total": 78.1898422241211,
468
- "eval_disk_space_used": 26.469707489013672,
469
- "eval_f1": 0.7322672918855475,
470
- "eval_gpu_ram_allocated": 0.39359617233276367,
471
- "eval_gpu_ram_cached": 16.05078125,
472
- "eval_gpu_ram_total": 39.56402587890625,
473
- "eval_gpu_utilization": 31,
474
- "eval_loss": 0.8464106321334839,
475
- "eval_precision": 0.7319786149192336,
476
- "eval_recall": 0.7358,
477
- "eval_runtime": 15.2922,
478
- "eval_samples_per_second": 1307.853,
479
- "eval_steps_per_second": 40.87,
480
- "eval_system_ram_total": 83.48074722290039,
481
- "eval_system_ram_used": 3.8687171936035156,
482
- "step": 11250
483
- },
484
- {
485
- "epoch": 4.75,
486
- "learning_rate": 1.0064e-06,
487
- "loss": 0.6355,
488
- "step": 11875
489
- },
490
- {
491
- "epoch": 4.75,
492
- "eval_accuracy": 0.73495,
493
- "eval_disk_space_total": 78.1898422241211,
494
- "eval_disk_space_used": 26.470008850097656,
495
- "eval_f1": 0.7311394674076037,
496
- "eval_gpu_ram_allocated": 0.39347314834594727,
497
- "eval_gpu_ram_cached": 16.05078125,
498
- "eval_gpu_ram_total": 39.56402587890625,
499
- "eval_gpu_utilization": 30,
500
- "eval_loss": 0.850346565246582,
501
- "eval_precision": 0.7308376671248696,
502
- "eval_recall": 0.73495,
503
- "eval_runtime": 15.2775,
504
- "eval_samples_per_second": 1309.111,
505
- "eval_steps_per_second": 40.91,
506
- "eval_system_ram_total": 83.48074722290039,
507
- "eval_system_ram_used": 3.8853225708007812,
508
- "step": 11875
509
- }
510
- ],
511
- "max_steps": 12500,
512
- "num_train_epochs": 5,
513
- "total_flos": 6101292142348032.0,
514
- "trial_name": null,
515
- "trial_params": null
516
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-12000/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4856ce4bc7a3b9a50464d3e12d6b58a0fcddfe9bfd910688b04cf70984b818d
3
- size 4091
 
 
 
 
checkpoint-12000/vocab.txt DELETED
The diff for this file is too large to render. See raw diff
 
runs/Jul23_01-08-06_df8adf32a670/events.out.tfevents.1690074496.df8adf32a670.1420.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3dbc2d39c4c64c3b5480bd01b1771b4f4940fe7f8922458f8cf510bf6c94e960
3
- size 26922
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f5795cbe79279682da91bd2446f596fe412ee2f87bfe2a94f7a1c73864d5d49
3
+ size 27276
checkpoint-12000/scheduler.pt → runs/Jul23_01-08-06_df8adf32a670/events.out.tfevents.1690076024.df8adf32a670.1420.1 RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee8b6ab7544af193bb6ba46a2bee0e0abe8937e6b54f5dea5a487ee681ff4d17
3
- size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aab6c0387b1a739cbdb3497d13102c37de6cdfc940c4b086d3445e1682a3551
3
+ size 1033