w11wo commited on
Commit
74f1b5e
1 Parent(s): 1b46f5f

End of training

Browse files
README.md CHANGED
@@ -20,11 +20,11 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  This model is a fine-tuned version of [xlm-roberta-large](https://huggingface.co/xlm-roberta-large) on an unknown dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 0.8974
24
- - Accuracy: 0.8918
25
- - F1: 0.7387
26
- - Precision: 0.9318
27
- - Recall: 0.6119
28
 
29
  ## Model description
30
 
 
20
 
21
  This model is a fine-tuned version of [xlm-roberta-large](https://huggingface.co/xlm-roberta-large) on an unknown dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.4322
24
+ - Accuracy: 0.8885
25
+ - F1: 0.7692
26
+ - Precision: 0.7937
27
+ - Recall: 0.7463
28
 
29
  ## Model description
30
 
all_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.0,
3
+ "eval_accuracy": 0.8884758364312267,
4
+ "eval_f1": 0.7692307692307693,
5
+ "eval_loss": 0.432170569896698,
6
+ "eval_precision": 0.7936507936507936,
7
+ "eval_recall": 0.746268656716418,
8
+ "eval_runtime": 6.7794,
9
+ "eval_samples": 268,
10
+ "eval_samples_per_second": 79.358,
11
+ "eval_steps_per_second": 1.328,
12
+ "train_loss": 0.21473971275286485,
13
+ "train_runtime": 1465.663,
14
+ "train_samples": 1878,
15
+ "train_samples_per_second": 128.133,
16
+ "train_steps_per_second": 4.025
17
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.0,
3
+ "eval_accuracy": 0.8884758364312267,
4
+ "eval_f1": 0.7692307692307693,
5
+ "eval_loss": 0.432170569896698,
6
+ "eval_precision": 0.7936507936507936,
7
+ "eval_recall": 0.746268656716418,
8
+ "eval_runtime": 6.7794,
9
+ "eval_samples": 268,
10
+ "eval_samples_per_second": 79.358,
11
+ "eval_steps_per_second": 1.328
12
+ }
predict_results.txt ADDED
@@ -0,0 +1,539 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ index prediction
2
+ 0 1
3
+ 1 0
4
+ 2 0
5
+ 3 0
6
+ 4 1
7
+ 5 1
8
+ 6 1
9
+ 7 1
10
+ 8 0
11
+ 9 0
12
+ 10 0
13
+ 11 0
14
+ 12 1
15
+ 13 0
16
+ 14 0
17
+ 15 1
18
+ 16 1
19
+ 17 1
20
+ 18 0
21
+ 19 0
22
+ 20 0
23
+ 21 0
24
+ 22 0
25
+ 23 1
26
+ 24 0
27
+ 25 0
28
+ 26 0
29
+ 27 1
30
+ 28 1
31
+ 29 0
32
+ 30 0
33
+ 31 0
34
+ 32 0
35
+ 33 1
36
+ 34 0
37
+ 35 0
38
+ 36 0
39
+ 37 0
40
+ 38 0
41
+ 39 0
42
+ 40 0
43
+ 41 0
44
+ 42 0
45
+ 43 0
46
+ 44 1
47
+ 45 0
48
+ 46 0
49
+ 47 0
50
+ 48 0
51
+ 49 0
52
+ 50 0
53
+ 51 0
54
+ 52 0
55
+ 53 0
56
+ 54 0
57
+ 55 0
58
+ 56 0
59
+ 57 0
60
+ 58 0
61
+ 59 0
62
+ 60 0
63
+ 61 1
64
+ 62 0
65
+ 63 1
66
+ 64 0
67
+ 65 0
68
+ 66 0
69
+ 67 0
70
+ 68 0
71
+ 69 0
72
+ 70 1
73
+ 71 1
74
+ 72 0
75
+ 73 1
76
+ 74 0
77
+ 75 0
78
+ 76 0
79
+ 77 0
80
+ 78 0
81
+ 79 0
82
+ 80 0
83
+ 81 0
84
+ 82 0
85
+ 83 1
86
+ 84 1
87
+ 85 0
88
+ 86 1
89
+ 87 0
90
+ 88 0
91
+ 89 0
92
+ 90 0
93
+ 91 0
94
+ 92 1
95
+ 93 0
96
+ 94 0
97
+ 95 0
98
+ 96 1
99
+ 97 0
100
+ 98 0
101
+ 99 0
102
+ 100 0
103
+ 101 0
104
+ 102 0
105
+ 103 0
106
+ 104 0
107
+ 105 1
108
+ 106 0
109
+ 107 0
110
+ 108 1
111
+ 109 0
112
+ 110 0
113
+ 111 1
114
+ 112 0
115
+ 113 0
116
+ 114 0
117
+ 115 0
118
+ 116 0
119
+ 117 0
120
+ 118 0
121
+ 119 1
122
+ 120 0
123
+ 121 0
124
+ 122 0
125
+ 123 0
126
+ 124 0
127
+ 125 0
128
+ 126 0
129
+ 127 0
130
+ 128 0
131
+ 129 0
132
+ 130 1
133
+ 131 0
134
+ 132 0
135
+ 133 0
136
+ 134 0
137
+ 135 0
138
+ 136 1
139
+ 137 0
140
+ 138 0
141
+ 139 0
142
+ 140 1
143
+ 141 0
144
+ 142 1
145
+ 143 0
146
+ 144 1
147
+ 145 0
148
+ 146 0
149
+ 147 1
150
+ 148 1
151
+ 149 1
152
+ 150 1
153
+ 151 0
154
+ 152 0
155
+ 153 0
156
+ 154 0
157
+ 155 0
158
+ 156 0
159
+ 157 0
160
+ 158 1
161
+ 159 0
162
+ 160 0
163
+ 161 0
164
+ 162 0
165
+ 163 0
166
+ 164 0
167
+ 165 0
168
+ 166 0
169
+ 167 0
170
+ 168 0
171
+ 169 0
172
+ 170 0
173
+ 171 1
174
+ 172 0
175
+ 173 0
176
+ 174 1
177
+ 175 0
178
+ 176 0
179
+ 177 0
180
+ 178 0
181
+ 179 1
182
+ 180 1
183
+ 181 0
184
+ 182 0
185
+ 183 0
186
+ 184 0
187
+ 185 1
188
+ 186 0
189
+ 187 1
190
+ 188 0
191
+ 189 0
192
+ 190 0
193
+ 191 0
194
+ 192 1
195
+ 193 0
196
+ 194 0
197
+ 195 1
198
+ 196 0
199
+ 197 0
200
+ 198 1
201
+ 199 0
202
+ 200 0
203
+ 201 0
204
+ 202 1
205
+ 203 1
206
+ 204 0
207
+ 205 1
208
+ 206 0
209
+ 207 0
210
+ 208 0
211
+ 209 0
212
+ 210 1
213
+ 211 0
214
+ 212 1
215
+ 213 0
216
+ 214 0
217
+ 215 0
218
+ 216 1
219
+ 217 0
220
+ 218 1
221
+ 219 0
222
+ 220 0
223
+ 221 0
224
+ 222 0
225
+ 223 1
226
+ 224 0
227
+ 225 0
228
+ 226 1
229
+ 227 0
230
+ 228 1
231
+ 229 0
232
+ 230 0
233
+ 231 1
234
+ 232 1
235
+ 233 0
236
+ 234 1
237
+ 235 0
238
+ 236 0
239
+ 237 1
240
+ 238 0
241
+ 239 0
242
+ 240 0
243
+ 241 0
244
+ 242 0
245
+ 243 0
246
+ 244 0
247
+ 245 0
248
+ 246 0
249
+ 247 0
250
+ 248 0
251
+ 249 1
252
+ 250 0
253
+ 251 0
254
+ 252 1
255
+ 253 0
256
+ 254 1
257
+ 255 0
258
+ 256 0
259
+ 257 0
260
+ 258 0
261
+ 259 1
262
+ 260 1
263
+ 261 0
264
+ 262 0
265
+ 263 0
266
+ 264 0
267
+ 265 0
268
+ 266 0
269
+ 267 0
270
+ 268 1
271
+ 269 0
272
+ 270 0
273
+ 271 1
274
+ 272 0
275
+ 273 0
276
+ 274 0
277
+ 275 0
278
+ 276 0
279
+ 277 0
280
+ 278 1
281
+ 279 0
282
+ 280 0
283
+ 281 1
284
+ 282 0
285
+ 283 0
286
+ 284 0
287
+ 285 0
288
+ 286 0
289
+ 287 0
290
+ 288 0
291
+ 289 0
292
+ 290 0
293
+ 291 0
294
+ 292 0
295
+ 293 0
296
+ 294 1
297
+ 295 0
298
+ 296 0
299
+ 297 0
300
+ 298 0
301
+ 299 1
302
+ 300 0
303
+ 301 0
304
+ 302 1
305
+ 303 0
306
+ 304 1
307
+ 305 0
308
+ 306 1
309
+ 307 0
310
+ 308 0
311
+ 309 0
312
+ 310 0
313
+ 311 0
314
+ 312 0
315
+ 313 0
316
+ 314 0
317
+ 315 0
318
+ 316 0
319
+ 317 0
320
+ 318 0
321
+ 319 0
322
+ 320 1
323
+ 321 0
324
+ 322 0
325
+ 323 0
326
+ 324 0
327
+ 325 1
328
+ 326 1
329
+ 327 1
330
+ 328 0
331
+ 329 0
332
+ 330 0
333
+ 331 0
334
+ 332 0
335
+ 333 0
336
+ 334 0
337
+ 335 0
338
+ 336 0
339
+ 337 0
340
+ 338 0
341
+ 339 0
342
+ 340 1
343
+ 341 1
344
+ 342 0
345
+ 343 0
346
+ 344 0
347
+ 345 0
348
+ 346 1
349
+ 347 0
350
+ 348 0
351
+ 349 0
352
+ 350 0
353
+ 351 0
354
+ 352 0
355
+ 353 1
356
+ 354 0
357
+ 355 0
358
+ 356 0
359
+ 357 0
360
+ 358 0
361
+ 359 1
362
+ 360 1
363
+ 361 0
364
+ 362 0
365
+ 363 0
366
+ 364 0
367
+ 365 0
368
+ 366 1
369
+ 367 1
370
+ 368 1
371
+ 369 0
372
+ 370 0
373
+ 371 1
374
+ 372 0
375
+ 373 1
376
+ 374 0
377
+ 375 0
378
+ 376 0
379
+ 377 0
380
+ 378 1
381
+ 379 0
382
+ 380 1
383
+ 381 0
384
+ 382 0
385
+ 383 0
386
+ 384 1
387
+ 385 0
388
+ 386 0
389
+ 387 1
390
+ 388 1
391
+ 389 0
392
+ 390 1
393
+ 391 0
394
+ 392 0
395
+ 393 0
396
+ 394 1
397
+ 395 0
398
+ 396 0
399
+ 397 0
400
+ 398 0
401
+ 399 0
402
+ 400 1
403
+ 401 0
404
+ 402 0
405
+ 403 1
406
+ 404 0
407
+ 405 1
408
+ 406 0
409
+ 407 0
410
+ 408 0
411
+ 409 0
412
+ 410 0
413
+ 411 0
414
+ 412 1
415
+ 413 0
416
+ 414 0
417
+ 415 0
418
+ 416 0
419
+ 417 1
420
+ 418 0
421
+ 419 0
422
+ 420 1
423
+ 421 0
424
+ 422 1
425
+ 423 1
426
+ 424 0
427
+ 425 0
428
+ 426 1
429
+ 427 0
430
+ 428 0
431
+ 429 0
432
+ 430 0
433
+ 431 0
434
+ 432 0
435
+ 433 1
436
+ 434 0
437
+ 435 0
438
+ 436 0
439
+ 437 0
440
+ 438 0
441
+ 439 0
442
+ 440 0
443
+ 441 1
444
+ 442 0
445
+ 443 0
446
+ 444 0
447
+ 445 0
448
+ 446 0
449
+ 447 0
450
+ 448 0
451
+ 449 1
452
+ 450 0
453
+ 451 1
454
+ 452 0
455
+ 453 0
456
+ 454 0
457
+ 455 0
458
+ 456 0
459
+ 457 0
460
+ 458 0
461
+ 459 0
462
+ 460 0
463
+ 461 0
464
+ 462 0
465
+ 463 0
466
+ 464 1
467
+ 465 0
468
+ 466 1
469
+ 467 0
470
+ 468 0
471
+ 469 0
472
+ 470 0
473
+ 471 1
474
+ 472 0
475
+ 473 0
476
+ 474 0
477
+ 475 1
478
+ 476 0
479
+ 477 0
480
+ 478 0
481
+ 479 0
482
+ 480 1
483
+ 481 1
484
+ 482 0
485
+ 483 0
486
+ 484 0
487
+ 485 0
488
+ 486 0
489
+ 487 1
490
+ 488 0
491
+ 489 0
492
+ 490 0
493
+ 491 0
494
+ 492 0
495
+ 493 1
496
+ 494 1
497
+ 495 1
498
+ 496 1
499
+ 497 0
500
+ 498 0
501
+ 499 0
502
+ 500 0
503
+ 501 0
504
+ 502 0
505
+ 503 0
506
+ 504 0
507
+ 505 0
508
+ 506 0
509
+ 507 0
510
+ 508 0
511
+ 509 0
512
+ 510 0
513
+ 511 0
514
+ 512 0
515
+ 513 0
516
+ 514 0
517
+ 515 0
518
+ 516 0
519
+ 517 1
520
+ 518 0
521
+ 519 0
522
+ 520 0
523
+ 521 0
524
+ 522 0
525
+ 523 0
526
+ 524 0
527
+ 525 0
528
+ 526 0
529
+ 527 0
530
+ 528 0
531
+ 529 1
532
+ 530 0
533
+ 531 0
534
+ 532 0
535
+ 533 0
536
+ 534 0
537
+ 535 1
538
+ 536 1
539
+ 537 1
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.0,
3
+ "train_loss": 0.21473971275286485,
4
+ "train_runtime": 1465.663,
5
+ "train_samples": 1878,
6
+ "train_samples_per_second": 128.133,
7
+ "train_steps_per_second": 4.025
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8181818181818182,
3
+ "best_model_checkpoint": "outputs/xlm-roberta-large-twitter-indonesia-sarcastic/checkpoint-708",
4
+ "epoch": 15.0,
5
+ "eval_steps": 500,
6
+ "global_step": 885,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "learning_rate": 9.997697221833061e-06,
14
+ "loss": 0.5862,
15
+ "step": 59
16
+ },
17
+ {
18
+ "epoch": 1.0,
19
+ "eval_accuracy": 0.75,
20
+ "eval_f1": 0.0,
21
+ "eval_loss": 0.5304240584373474,
22
+ "eval_precision": 0.0,
23
+ "eval_recall": 0.0,
24
+ "eval_runtime": 3.3745,
25
+ "eval_samples_per_second": 79.42,
26
+ "eval_steps_per_second": 1.482,
27
+ "step": 59
28
+ },
29
+ {
30
+ "epoch": 2.0,
31
+ "learning_rate": 9.990951812568578e-06,
32
+ "loss": 0.5168,
33
+ "step": 118
34
+ },
35
+ {
36
+ "epoch": 2.0,
37
+ "eval_accuracy": 0.75,
38
+ "eval_f1": 0.0,
39
+ "eval_loss": 0.4897027611732483,
40
+ "eval_precision": 0.0,
41
+ "eval_recall": 0.0,
42
+ "eval_runtime": 3.3714,
43
+ "eval_samples_per_second": 79.491,
44
+ "eval_steps_per_second": 1.483,
45
+ "step": 118
46
+ },
47
+ {
48
+ "epoch": 3.0,
49
+ "learning_rate": 9.979044933876465e-06,
50
+ "loss": 0.4771,
51
+ "step": 177
52
+ },
53
+ {
54
+ "epoch": 3.0,
55
+ "eval_accuracy": 0.7947761194029851,
56
+ "eval_f1": 0.3373493975903615,
57
+ "eval_loss": 0.45346158742904663,
58
+ "eval_precision": 0.875,
59
+ "eval_recall": 0.208955223880597,
60
+ "eval_runtime": 3.3741,
61
+ "eval_samples_per_second": 79.428,
62
+ "eval_steps_per_second": 1.482,
63
+ "step": 177
64
+ },
65
+ {
66
+ "epoch": 4.0,
67
+ "learning_rate": 9.962224338961997e-06,
68
+ "loss": 0.4101,
69
+ "step": 236
70
+ },
71
+ {
72
+ "epoch": 4.0,
73
+ "eval_accuracy": 0.7910447761194029,
74
+ "eval_f1": 0.6585365853658537,
75
+ "eval_loss": 0.4235016405582428,
76
+ "eval_precision": 0.5567010309278351,
77
+ "eval_recall": 0.8059701492537313,
78
+ "eval_runtime": 3.3736,
79
+ "eval_samples_per_second": 79.44,
80
+ "eval_steps_per_second": 1.482,
81
+ "step": 236
82
+ },
83
+ {
84
+ "epoch": 5.0,
85
+ "learning_rate": 9.940506627721576e-06,
86
+ "loss": 0.3225,
87
+ "step": 295
88
+ },
89
+ {
90
+ "epoch": 5.0,
91
+ "eval_accuracy": 0.8507462686567164,
92
+ "eval_f1": 0.5918367346938774,
93
+ "eval_loss": 0.47333377599716187,
94
+ "eval_precision": 0.9354838709677419,
95
+ "eval_recall": 0.43283582089552236,
96
+ "eval_runtime": 3.3745,
97
+ "eval_samples_per_second": 79.419,
98
+ "eval_steps_per_second": 1.482,
99
+ "step": 295
100
+ },
101
+ {
102
+ "epoch": 6.0,
103
+ "learning_rate": 9.913913232914188e-06,
104
+ "loss": 0.2246,
105
+ "step": 354
106
+ },
107
+ {
108
+ "epoch": 6.0,
109
+ "eval_accuracy": 0.8694029850746269,
110
+ "eval_f1": 0.7008547008547008,
111
+ "eval_loss": 0.3362283706665039,
112
+ "eval_precision": 0.82,
113
+ "eval_recall": 0.6119402985074627,
114
+ "eval_runtime": 3.3741,
115
+ "eval_samples_per_second": 79.428,
116
+ "eval_steps_per_second": 1.482,
117
+ "step": 354
118
+ },
119
+ {
120
+ "epoch": 7.0,
121
+ "learning_rate": 9.882470399009847e-06,
122
+ "loss": 0.166,
123
+ "step": 413
124
+ },
125
+ {
126
+ "epoch": 7.0,
127
+ "eval_accuracy": 0.8768656716417911,
128
+ "eval_f1": 0.722689075630252,
129
+ "eval_loss": 0.36720752716064453,
130
+ "eval_precision": 0.8269230769230769,
131
+ "eval_recall": 0.6417910447761194,
132
+ "eval_runtime": 3.3726,
133
+ "eval_samples_per_second": 79.464,
134
+ "eval_steps_per_second": 1.483,
135
+ "step": 413
136
+ },
137
+ {
138
+ "epoch": 8.0,
139
+ "learning_rate": 9.846863705058136e-06,
140
+ "loss": 0.0989,
141
+ "step": 472
142
+ },
143
+ {
144
+ "epoch": 8.0,
145
+ "eval_accuracy": 0.8768656716417911,
146
+ "eval_f1": 0.7625899280575541,
147
+ "eval_loss": 0.38347017765045166,
148
+ "eval_precision": 0.7361111111111112,
149
+ "eval_recall": 0.7910447761194029,
150
+ "eval_runtime": 3.3739,
151
+ "eval_samples_per_second": 79.433,
152
+ "eval_steps_per_second": 1.482,
153
+ "step": 472
154
+ },
155
+ {
156
+ "epoch": 9.0,
157
+ "learning_rate": 9.805900576231358e-06,
158
+ "loss": 0.0797,
159
+ "step": 531
160
+ },
161
+ {
162
+ "epoch": 9.0,
163
+ "eval_accuracy": 0.8992537313432836,
164
+ "eval_f1": 0.7938931297709924,
165
+ "eval_loss": 0.43790221214294434,
166
+ "eval_precision": 0.8125,
167
+ "eval_recall": 0.7761194029850746,
168
+ "eval_runtime": 3.3735,
169
+ "eval_samples_per_second": 79.442,
170
+ "eval_steps_per_second": 1.482,
171
+ "step": 531
172
+ },
173
+ {
174
+ "epoch": 10.0,
175
+ "learning_rate": 9.760194603759247e-06,
176
+ "loss": 0.08,
177
+ "step": 590
178
+ },
179
+ {
180
+ "epoch": 10.0,
181
+ "eval_accuracy": 0.8544776119402985,
182
+ "eval_f1": 0.7450980392156863,
183
+ "eval_loss": 0.7676528096199036,
184
+ "eval_precision": 0.6627906976744186,
185
+ "eval_recall": 0.8507462686567164,
186
+ "eval_runtime": 3.3728,
187
+ "eval_samples_per_second": 79.46,
188
+ "eval_steps_per_second": 1.482,
189
+ "step": 590
190
+ },
191
+ {
192
+ "epoch": 11.0,
193
+ "learning_rate": 9.709790893918487e-06,
194
+ "loss": 0.0505,
195
+ "step": 649
196
+ },
197
+ {
198
+ "epoch": 11.0,
199
+ "eval_accuracy": 0.8805970149253731,
200
+ "eval_f1": 0.7288135593220338,
201
+ "eval_loss": 0.7316186428070068,
202
+ "eval_precision": 0.8431372549019608,
203
+ "eval_recall": 0.6417910447761194,
204
+ "eval_runtime": 3.3708,
205
+ "eval_samples_per_second": 79.507,
206
+ "eval_steps_per_second": 1.483,
207
+ "step": 649
208
+ },
209
+ {
210
+ "epoch": 12.0,
211
+ "learning_rate": 9.654739189085373e-06,
212
+ "loss": 0.073,
213
+ "step": 708
214
+ },
215
+ {
216
+ "epoch": 12.0,
217
+ "eval_accuracy": 0.9104477611940298,
218
+ "eval_f1": 0.8181818181818182,
219
+ "eval_loss": 0.47964057326316833,
220
+ "eval_precision": 0.8307692307692308,
221
+ "eval_recall": 0.8059701492537313,
222
+ "eval_runtime": 3.372,
223
+ "eval_samples_per_second": 79.477,
224
+ "eval_steps_per_second": 1.483,
225
+ "step": 708
226
+ },
227
+ {
228
+ "epoch": 13.0,
229
+ "learning_rate": 9.595093818646103e-06,
230
+ "loss": 0.05,
231
+ "step": 767
232
+ },
233
+ {
234
+ "epoch": 13.0,
235
+ "eval_accuracy": 0.8694029850746269,
236
+ "eval_f1": 0.7058823529411765,
237
+ "eval_loss": 0.8468834757804871,
238
+ "eval_precision": 0.8076923076923077,
239
+ "eval_recall": 0.6268656716417911,
240
+ "eval_runtime": 3.3726,
241
+ "eval_samples_per_second": 79.464,
242
+ "eval_steps_per_second": 1.483,
243
+ "step": 767
244
+ },
245
+ {
246
+ "epoch": 14.0,
247
+ "learning_rate": 9.530913645380233e-06,
248
+ "loss": 0.0583,
249
+ "step": 826
250
+ },
251
+ {
252
+ "epoch": 14.0,
253
+ "eval_accuracy": 0.8917910447761194,
254
+ "eval_f1": 0.7563025210084034,
255
+ "eval_loss": 0.7265912294387817,
256
+ "eval_precision": 0.8653846153846154,
257
+ "eval_recall": 0.6716417910447762,
258
+ "eval_runtime": 3.3727,
259
+ "eval_samples_per_second": 79.462,
260
+ "eval_steps_per_second": 1.483,
261
+ "step": 826
262
+ },
263
+ {
264
+ "epoch": 15.0,
265
+ "learning_rate": 9.462262007370205e-06,
266
+ "loss": 0.0275,
267
+ "step": 885
268
+ },
269
+ {
270
+ "epoch": 15.0,
271
+ "eval_accuracy": 0.8917910447761194,
272
+ "eval_f1": 0.7387387387387387,
273
+ "eval_loss": 0.897386908531189,
274
+ "eval_precision": 0.9318181818181818,
275
+ "eval_recall": 0.6119402985074627,
276
+ "eval_runtime": 3.3738,
277
+ "eval_samples_per_second": 79.437,
278
+ "eval_steps_per_second": 1.482,
279
+ "step": 885
280
+ },
281
+ {
282
+ "epoch": 15.0,
283
+ "step": 885,
284
+ "total_flos": 6563126626237440.0,
285
+ "train_loss": 0.21473971275286485,
286
+ "train_runtime": 1465.663,
287
+ "train_samples_per_second": 128.133,
288
+ "train_steps_per_second": 4.025
289
+ }
290
+ ],
291
+ "logging_steps": 500,
292
+ "max_steps": 5900,
293
+ "num_input_tokens_seen": 0,
294
+ "num_train_epochs": 100,
295
+ "save_steps": 500,
296
+ "total_flos": 6563126626237440.0,
297
+ "train_batch_size": 32,
298
+ "trial_name": null,
299
+ "trial_params": null
300
+ }