apwic commited on
Commit
34c0a60
1 Parent(s): 66a693b

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accuracy": 0.9070227497527201,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.8947368421052632,
5
- "eval_f1": 0.8723802851224565,
6
- "eval_loss": 0.31320512294769287,
7
- "eval_precision": 0.8743110838946724,
8
- "eval_recall": 0.8705219130751045,
9
- "eval_runtime": 4.938,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 80.802,
12
- "eval_steps_per_second": 10.126,
13
- "f1": 0.8883990642967194,
14
- "precision": 0.8876511954992967,
15
- "recall": 0.8891582028860001,
16
- "train_loss": 0.232629602463519,
17
- "train_runtime": 1905.8691,
18
  "train_samples": 3638,
19
- "train_samples_per_second": 38.177,
20
- "train_steps_per_second": 1.28
21
  }
 
1
  {
2
+ "accuracy": 0.9030662710187932,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.8872180451127819,
5
+ "eval_f1": 0.8622036668943447,
6
+ "eval_loss": 0.32483023405075073,
7
+ "eval_precision": 0.8671602787456446,
8
+ "eval_recall": 0.8577014002545917,
9
+ "eval_runtime": 1.7849,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 223.546,
12
+ "eval_steps_per_second": 28.013,
13
+ "f1": 0.8838748241912799,
14
+ "precision": 0.8824176388720238,
15
+ "recall": 0.8853765637207376,
16
+ "train_loss": 0.2495564101172275,
17
+ "train_runtime": 624.4298,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 116.522,
20
+ "train_steps_per_second": 3.908
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.8947368421052632,
4
- "eval_f1": 0.8723802851224565,
5
- "eval_loss": 0.31320512294769287,
6
- "eval_precision": 0.8743110838946724,
7
- "eval_recall": 0.8705219130751045,
8
- "eval_runtime": 4.938,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 80.802,
11
- "eval_steps_per_second": 10.126
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.8872180451127819,
4
+ "eval_f1": 0.8622036668943447,
5
+ "eval_loss": 0.32483023405075073,
6
+ "eval_precision": 0.8671602787456446,
7
+ "eval_recall": 0.8577014002545917,
8
+ "eval_runtime": 1.7849,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 223.546,
11
+ "eval_steps_per_second": 28.013
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.9070227497527201,
3
- "f1": 0.8883990642967194,
4
- "precision": 0.8876511954992967,
5
- "recall": 0.8891582028860001
6
  }
 
1
  {
2
+ "accuracy": 0.9030662710187932,
3
+ "f1": 0.8838748241912799,
4
+ "precision": 0.8824176388720238,
5
+ "recall": 0.8853765637207376
6
  }
predict_results.txt CHANGED
@@ -29,7 +29,7 @@ index prediction
29
  27 1
30
  28 1
31
  29 1
32
- 30 0
33
  31 0
34
  32 1
35
  33 1
@@ -38,7 +38,7 @@ index prediction
38
  36 0
39
  37 1
40
  38 1
41
- 39 1
42
  40 1
43
  41 1
44
  42 1
@@ -56,7 +56,7 @@ index prediction
56
  54 1
57
  55 1
58
  56 1
59
- 57 0
60
  58 1
61
  59 1
62
  60 1
@@ -86,7 +86,7 @@ index prediction
86
  84 0
87
  85 1
88
  86 1
89
- 87 1
90
  88 1
91
  89 1
92
  90 1
@@ -98,15 +98,15 @@ index prediction
98
  96 1
99
  97 1
100
  98 1
101
- 99 0
102
  100 1
103
- 101 0
104
  102 1
105
  103 1
106
  104 1
107
- 105 0
108
  106 1
109
- 107 0
110
  108 1
111
  109 1
112
  110 0
@@ -120,7 +120,7 @@ index prediction
120
  118 1
121
  119 1
122
  120 1
123
- 121 1
124
  122 1
125
  123 0
126
  124 1
@@ -130,7 +130,7 @@ index prediction
130
  128 1
131
  129 1
132
  130 1
133
- 131 1
134
  132 1
135
  133 1
136
  134 1
@@ -152,7 +152,7 @@ index prediction
152
  150 1
153
  151 1
154
  152 1
155
- 153 1
156
  154 1
157
  155 1
158
  156 1
@@ -164,7 +164,7 @@ index prediction
164
  162 1
165
  163 1
166
  164 0
167
- 165 1
168
  166 1
169
  167 1
170
  168 1
@@ -173,10 +173,10 @@ index prediction
173
  171 1
174
  172 1
175
  173 1
176
- 174 1
177
  175 1
178
  176 1
179
- 177 0
180
  178 1
181
  179 1
182
  180 1
@@ -184,7 +184,7 @@ index prediction
184
  182 1
185
  183 1
186
  184 1
187
- 185 1
188
  186 1
189
  187 1
190
  188 1
@@ -206,7 +206,7 @@ index prediction
206
  204 0
207
  205 1
208
  206 1
209
- 207 1
210
  208 1
211
  209 1
212
  210 1
@@ -217,7 +217,7 @@ index prediction
217
  215 0
218
  216 1
219
  217 0
220
- 218 1
221
  219 1
222
  220 1
223
  221 1
@@ -239,19 +239,19 @@ index prediction
239
  237 1
240
  238 0
241
  239 1
242
- 240 0
243
  241 1
244
  242 1
245
  243 1
246
  244 1
247
- 245 0
248
  246 1
249
  247 1
250
  248 1
251
  249 1
252
  250 0
253
  251 0
254
- 252 0
255
  253 1
256
  254 1
257
  255 1
@@ -279,7 +279,7 @@ index prediction
279
  277 1
280
  278 1
281
  279 1
282
- 280 1
283
  281 1
284
  282 1
285
  283 1
@@ -322,7 +322,7 @@ index prediction
322
  320 0
323
  321 0
324
  322 0
325
- 323 1
326
  324 0
327
  325 0
328
  326 0
@@ -344,7 +344,7 @@ index prediction
344
  342 0
345
  343 0
346
  344 0
347
- 345 0
348
  346 0
349
  347 0
350
  348 0
@@ -365,7 +365,7 @@ index prediction
365
  363 0
366
  364 0
367
  365 0
368
- 366 1
369
  367 0
370
  368 0
371
  369 0
@@ -385,10 +385,10 @@ index prediction
385
  383 0
386
  384 0
387
  385 0
388
- 386 0
389
- 387 0
390
  388 0
391
- 389 1
392
  390 0
393
  391 0
394
  392 0
@@ -440,7 +440,7 @@ index prediction
440
  438 0
441
  439 0
442
  440 0
443
- 441 1
444
  442 0
445
  443 0
446
  444 0
@@ -472,7 +472,7 @@ index prediction
472
  470 0
473
  471 0
474
  472 0
475
- 473 0
476
  474 0
477
  475 0
478
  476 0
@@ -496,8 +496,8 @@ index prediction
496
  494 0
497
  495 0
498
  496 0
499
- 497 1
500
- 498 1
501
  499 0
502
  500 0
503
  501 0
@@ -525,14 +525,14 @@ index prediction
525
  523 0
526
  524 0
527
  525 0
528
- 526 0
529
  527 0
530
  528 0
531
  529 0
532
  530 0
533
- 531 0
534
  532 0
535
- 533 0
536
  534 0
537
  535 0
538
  536 0
@@ -561,7 +561,7 @@ index prediction
561
  559 0
562
  560 0
563
  561 0
564
- 562 1
565
  563 0
566
  564 0
567
  565 0
@@ -580,9 +580,9 @@ index prediction
580
  578 0
581
  579 0
582
  580 0
583
- 581 1
584
  582 0
585
- 583 0
586
  584 0
587
  585 0
588
  586 0
@@ -594,7 +594,7 @@ index prediction
594
  592 0
595
  593 0
596
  594 0
597
- 595 0
598
  596 0
599
  597 0
600
  598 0
@@ -604,15 +604,15 @@ index prediction
604
  602 0
605
  603 1
606
  604 0
607
- 605 0
608
  606 0
609
  607 0
610
  608 0
611
  609 0
612
  610 0
613
- 611 0
614
  612 0
615
- 613 1
616
  614 0
617
  615 0
618
  616 0
@@ -622,7 +622,7 @@ index prediction
622
  620 0
623
  621 0
624
  622 0
625
- 623 0
626
  624 0
627
  625 1
628
  626 0
@@ -662,9 +662,9 @@ index prediction
662
  660 0
663
  661 0
664
  662 0
665
- 663 1
666
  664 1
667
- 665 0
668
  666 0
669
  667 0
670
  668 0
@@ -685,7 +685,7 @@ index prediction
685
  683 0
686
  684 0
687
  685 0
688
- 686 1
689
  687 0
690
  688 0
691
  689 0
@@ -705,7 +705,7 @@ index prediction
705
  703 0
706
  704 0
707
  705 0
708
- 706 1
709
  707 0
710
  708 0
711
  709 0
@@ -765,12 +765,12 @@ index prediction
765
  763 0
766
  764 0
767
  765 0
768
- 766 0
769
  767 0
770
  768 0
771
  769 0
772
  770 0
773
- 771 1
774
  772 0
775
  773 0
776
  774 0
@@ -840,7 +840,7 @@ index prediction
840
  838 0
841
  839 0
842
  840 0
843
- 841 1
844
  842 1
845
  843 0
846
  844 0
@@ -859,7 +859,7 @@ index prediction
859
  857 0
860
  858 0
861
  859 0
862
- 860 0
863
  861 0
864
  862 0
865
  863 0
@@ -968,19 +968,19 @@ index prediction
968
  966 1
969
  967 0
970
  968 0
971
- 969 0
972
  970 0
973
  971 0
974
  972 0
975
  973 0
976
- 974 0
977
  975 0
978
  976 0
979
  977 0
980
  978 0
981
  979 0
982
  980 1
983
- 981 1
984
  982 0
985
  983 0
986
  984 0
@@ -998,7 +998,7 @@ index prediction
998
  996 0
999
  997 0
1000
  998 0
1001
- 999 0
1002
  1000 0
1003
  1001 0
1004
  1002 0
 
29
  27 1
30
  28 1
31
  29 1
32
+ 30 1
33
  31 0
34
  32 1
35
  33 1
 
38
  36 0
39
  37 1
40
  38 1
41
+ 39 0
42
  40 1
43
  41 1
44
  42 1
 
56
  54 1
57
  55 1
58
  56 1
59
+ 57 1
60
  58 1
61
  59 1
62
  60 1
 
86
  84 0
87
  85 1
88
  86 1
89
+ 87 0
90
  88 1
91
  89 1
92
  90 1
 
98
  96 1
99
  97 1
100
  98 1
101
+ 99 1
102
  100 1
103
+ 101 1
104
  102 1
105
  103 1
106
  104 1
107
+ 105 1
108
  106 1
109
+ 107 1
110
  108 1
111
  109 1
112
  110 0
 
120
  118 1
121
  119 1
122
  120 1
123
+ 121 0
124
  122 1
125
  123 0
126
  124 1
 
130
  128 1
131
  129 1
132
  130 1
133
+ 131 0
134
  132 1
135
  133 1
136
  134 1
 
152
  150 1
153
  151 1
154
  152 1
155
+ 153 0
156
  154 1
157
  155 1
158
  156 1
 
164
  162 1
165
  163 1
166
  164 0
167
+ 165 0
168
  166 1
169
  167 1
170
  168 1
 
173
  171 1
174
  172 1
175
  173 1
176
+ 174 0
177
  175 1
178
  176 1
179
+ 177 1
180
  178 1
181
  179 1
182
  180 1
 
184
  182 1
185
  183 1
186
  184 1
187
+ 185 0
188
  186 1
189
  187 1
190
  188 1
 
206
  204 0
207
  205 1
208
  206 1
209
+ 207 0
210
  208 1
211
  209 1
212
  210 1
 
217
  215 0
218
  216 1
219
  217 0
220
+ 218 0
221
  219 1
222
  220 1
223
  221 1
 
239
  237 1
240
  238 0
241
  239 1
242
+ 240 1
243
  241 1
244
  242 1
245
  243 1
246
  244 1
247
+ 245 1
248
  246 1
249
  247 1
250
  248 1
251
  249 1
252
  250 0
253
  251 0
254
+ 252 1
255
  253 1
256
  254 1
257
  255 1
 
279
  277 1
280
  278 1
281
  279 1
282
+ 280 0
283
  281 1
284
  282 1
285
  283 1
 
322
  320 0
323
  321 0
324
  322 0
325
+ 323 0
326
  324 0
327
  325 0
328
  326 0
 
344
  342 0
345
  343 0
346
  344 0
347
+ 345 1
348
  346 0
349
  347 0
350
  348 0
 
365
  363 0
366
  364 0
367
  365 0
368
+ 366 0
369
  367 0
370
  368 0
371
  369 0
 
385
  383 0
386
  384 0
387
  385 0
388
+ 386 1
389
+ 387 1
390
  388 0
391
+ 389 0
392
  390 0
393
  391 0
394
  392 0
 
440
  438 0
441
  439 0
442
  440 0
443
+ 441 0
444
  442 0
445
  443 0
446
  444 0
 
472
  470 0
473
  471 0
474
  472 0
475
+ 473 1
476
  474 0
477
  475 0
478
  476 0
 
496
  494 0
497
  495 0
498
  496 0
499
+ 497 0
500
+ 498 0
501
  499 0
502
  500 0
503
  501 0
 
525
  523 0
526
  524 0
527
  525 0
528
+ 526 1
529
  527 0
530
  528 0
531
  529 0
532
  530 0
533
+ 531 1
534
  532 0
535
+ 533 1
536
  534 0
537
  535 0
538
  536 0
 
561
  559 0
562
  560 0
563
  561 0
564
+ 562 0
565
  563 0
566
  564 0
567
  565 0
 
580
  578 0
581
  579 0
582
  580 0
583
+ 581 0
584
  582 0
585
+ 583 1
586
  584 0
587
  585 0
588
  586 0
 
594
  592 0
595
  593 0
596
  594 0
597
+ 595 1
598
  596 0
599
  597 0
600
  598 0
 
604
  602 0
605
  603 1
606
  604 0
607
+ 605 1
608
  606 0
609
  607 0
610
  608 0
611
  609 0
612
  610 0
613
+ 611 1
614
  612 0
615
+ 613 0
616
  614 0
617
  615 0
618
  616 0
 
622
  620 0
623
  621 0
624
  622 0
625
+ 623 1
626
  624 0
627
  625 1
628
  626 0
 
662
  660 0
663
  661 0
664
  662 0
665
+ 663 0
666
  664 1
667
+ 665 1
668
  666 0
669
  667 0
670
  668 0
 
685
  683 0
686
  684 0
687
  685 0
688
+ 686 0
689
  687 0
690
  688 0
691
  689 0
 
705
  703 0
706
  704 0
707
  705 0
708
+ 706 0
709
  707 0
710
  708 0
711
  709 0
 
765
  763 0
766
  764 0
767
  765 0
768
+ 766 1
769
  767 0
770
  768 0
771
  769 0
772
  770 0
773
+ 771 0
774
  772 0
775
  773 0
776
  774 0
 
840
  838 0
841
  839 0
842
  840 0
843
+ 841 0
844
  842 1
845
  843 0
846
  844 0
 
859
  857 0
860
  858 0
861
  859 0
862
+ 860 1
863
  861 0
864
  862 0
865
  863 0
 
968
  966 1
969
  967 0
970
  968 0
971
+ 969 1
972
  970 0
973
  971 0
974
  972 0
975
  973 0
976
+ 974 1
977
  975 0
978
  976 0
979
  977 0
980
  978 0
981
  979 0
982
  980 1
983
+ 981 0
984
  982 0
985
  983 0
986
  984 0
 
998
  996 0
999
  997 0
1000
  998 0
1001
+ 999 1
1002
  1000 0
1003
  1001 0
1004
  1002 0
runs/Jun03_13-55-30_a358b85c7679/events.out.tfevents.1717423570.a358b85c7679.123191.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec63af5327041cb52e73510de6fa75b1fc806de4b66867881961bb3c4dc11ce5
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.232629602463519,
4
- "train_runtime": 1905.8691,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 38.177,
7
- "train_steps_per_second": 1.28
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.2495564101172275,
4
+ "train_runtime": 624.4298,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 116.522,
7
+ "train_steps_per_second": 3.908
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 4.3584418296813965,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.5494,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.731829573934837,
21
- "eval_f1": 0.6369179742314071,
22
- "eval_loss": 0.5032888650894165,
23
- "eval_precision": 0.6683441169422477,
24
- "eval_recall": 0.6277505000909256,
25
- "eval_runtime": 5.0519,
26
- "eval_samples_per_second": 78.98,
27
- "eval_steps_per_second": 9.897,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 4.557066440582275,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.4551,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.7769423558897243,
40
- "eval_f1": 0.7522344322344323,
41
- "eval_loss": 0.4275681674480438,
42
- "eval_precision": 0.7434314656536879,
43
- "eval_recall": 0.7796872158574286,
44
- "eval_runtime": 5.0559,
45
- "eval_samples_per_second": 78.917,
46
- "eval_steps_per_second": 9.889,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 3.5165185928344727,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.3731,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.8395989974937343,
59
- "eval_f1": 0.7878928832480564,
60
- "eval_loss": 0.35077065229415894,
61
- "eval_precision": 0.8294492319220682,
62
- "eval_recall": 0.7665030005455538,
63
- "eval_runtime": 5.0754,
64
- "eval_samples_per_second": 78.614,
65
- "eval_steps_per_second": 9.851,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 3.075305461883545,
71
  "learning_rate": 4e-05,
72
- "loss": 0.3032,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.8671679197994987,
78
- "eval_f1": 0.8466330637850383,
79
- "eval_loss": 0.3236788511276245,
80
- "eval_precision": 0.8353276671885485,
81
- "eval_recall": 0.8635206401163849,
82
- "eval_runtime": 5.0489,
83
- "eval_samples_per_second": 79.027,
84
- "eval_steps_per_second": 9.903,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 3.069729804992676,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.2718,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.8721804511278195,
97
- "eval_f1": 0.8469505178365937,
98
- "eval_loss": 0.31016021966934204,
99
- "eval_precision": 0.844489247311828,
100
- "eval_recall": 0.8495635570103655,
101
- "eval_runtime": 5.0508,
102
- "eval_samples_per_second": 78.998,
103
- "eval_steps_per_second": 9.9,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 1.1438698768615723,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.2642,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.87468671679198,
116
- "eval_f1": 0.8524146298159436,
117
- "eval_loss": 0.30064335465431213,
118
- "eval_precision": 0.8451250578971746,
119
- "eval_recall": 0.8613384251682124,
120
- "eval_runtime": 5.048,
121
- "eval_samples_per_second": 79.041,
122
- "eval_steps_per_second": 9.905,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 0.622890830039978,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.2394,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.8721804511278195,
135
- "eval_f1": 0.8404212771630449,
136
- "eval_loss": 0.3013169765472412,
137
- "eval_precision": 0.854416558018253,
138
- "eval_recall": 0.8295599199854519,
139
- "eval_runtime": 5.0554,
140
- "eval_samples_per_second": 78.925,
141
- "eval_steps_per_second": 9.89,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 2.1770668029785156,
147
  "learning_rate": 3e-05,
148
- "loss": 0.2234,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8796992481203008,
154
- "eval_f1": 0.8533986527862829,
155
- "eval_loss": 0.290397047996521,
156
- "eval_precision": 0.8572003218020917,
157
- "eval_recall": 0.8498817966903074,
158
- "eval_runtime": 5.0587,
159
- "eval_samples_per_second": 78.874,
160
- "eval_steps_per_second": 9.884,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 8.559947967529297,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.2098,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.8897243107769424,
173
- "eval_f1": 0.8701248742380304,
174
- "eval_loss": 0.2983732223510742,
175
- "eval_precision": 0.8624507874015748,
176
- "eval_recall": 0.8794780869248955,
177
- "eval_runtime": 5.0499,
178
- "eval_samples_per_second": 79.011,
179
- "eval_steps_per_second": 9.901,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 2.985607385635376,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.2029,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.8822055137844611,
192
- "eval_f1": 0.8495004213314072,
193
- "eval_loss": 0.31886106729507446,
194
- "eval_precision": 0.8761860561056105,
195
- "eval_recall": 0.8316512093107837,
196
- "eval_runtime": 5.0651,
197
- "eval_samples_per_second": 78.775,
198
- "eval_steps_per_second": 9.872,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 0.5187826156616211,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.1917,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
  "eval_accuracy": 0.8847117794486216,
211
  "eval_f1": 0.8587719298245614,
212
- "eval_loss": 0.28483322262763977,
213
  "eval_precision": 0.864771021021021,
214
  "eval_recall": 0.8534278959810875,
215
- "eval_runtime": 5.0495,
216
- "eval_samples_per_second": 79.018,
217
- "eval_steps_per_second": 9.902,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 12.659485816955566,
223
  "learning_rate": 2e-05,
224
- "loss": 0.1797,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.8771929824561403,
230
- "eval_f1": 0.850729517396184,
231
- "eval_loss": 0.30030354857444763,
232
- "eval_precision": 0.8535087719298247,
233
- "eval_recall": 0.8481087470449173,
234
- "eval_runtime": 5.0516,
235
- "eval_samples_per_second": 78.985,
236
- "eval_steps_per_second": 9.898,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 2.4105708599090576,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.1658,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.8847117794486216,
249
- "eval_f1": 0.8595070422535211,
250
- "eval_loss": 0.3010116219520569,
251
- "eval_precision": 0.8633733523114054,
252
- "eval_recall": 0.8559283506092017,
253
- "eval_runtime": 5.0413,
254
- "eval_samples_per_second": 79.146,
255
- "eval_steps_per_second": 9.918,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 4.914756774902344,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.1551,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.8847117794486216,
268
- "eval_f1": 0.8622899159663866,
269
- "eval_loss": 0.3076874911785126,
270
- "eval_precision": 0.8589244307033712,
271
- "eval_recall": 0.8659301691216585,
272
- "eval_runtime": 5.0475,
273
- "eval_samples_per_second": 79.05,
274
- "eval_steps_per_second": 9.906,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 4.140334606170654,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.1517,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.8947368421052632,
287
- "eval_f1": 0.8703663593044124,
288
- "eval_loss": 0.30141717195510864,
289
- "eval_precision": 0.8789149003479912,
290
- "eval_recall": 0.8630205491907619,
291
- "eval_runtime": 5.0937,
292
- "eval_samples_per_second": 78.332,
293
- "eval_steps_per_second": 9.816,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 0.8993310332298279,
299
  "learning_rate": 1e-05,
300
- "loss": 0.1532,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.8947368421052632,
306
- "eval_f1": 0.8736504011098378,
307
- "eval_loss": 0.3066878616809845,
308
- "eval_precision": 0.8718487394957983,
309
- "eval_recall": 0.8755228223313329,
310
- "eval_runtime": 5.0442,
311
- "eval_samples_per_second": 79.1,
312
- "eval_steps_per_second": 9.912,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 0.4546063542366028,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.136,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.8897243107769424,
325
- "eval_f1": 0.8669758137843244,
326
- "eval_loss": 0.3174092173576355,
327
- "eval_precision": 0.8669758137843244,
328
- "eval_recall": 0.8669758137843244,
329
- "eval_runtime": 5.0564,
330
- "eval_samples_per_second": 78.91,
331
- "eval_steps_per_second": 9.889,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 4.372452259063721,
337
  "learning_rate": 5e-06,
338
- "loss": 0.1438,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.8897243107769424,
344
- "eval_f1": 0.8663031558425733,
345
- "eval_loss": 0.31285080313682556,
346
- "eval_precision": 0.8682026944274341,
347
- "eval_recall": 0.8644753591562102,
348
- "eval_runtime": 5.0509,
349
- "eval_samples_per_second": 78.996,
350
- "eval_steps_per_second": 9.899,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 6.023264408111572,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.1507,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.8922305764411027,
363
- "eval_f1": 0.8683279483657071,
364
- "eval_loss": 0.316543847322464,
365
- "eval_precision": 0.873366724738676,
366
- "eval_recall": 0.863747954173486,
367
- "eval_runtime": 5.058,
368
- "eval_samples_per_second": 78.885,
369
- "eval_steps_per_second": 9.885,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 4.744778156280518,
375
  "learning_rate": 0.0,
376
- "loss": 0.1326,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.8947368421052632,
382
- "eval_f1": 0.8723802851224565,
383
- "eval_loss": 0.31320512294769287,
384
- "eval_precision": 0.8743110838946724,
385
- "eval_recall": 0.8705219130751045,
386
- "eval_runtime": 5.0604,
387
- "eval_samples_per_second": 78.847,
388
- "eval_steps_per_second": 9.881,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8444128359504000.0,
395
- "train_loss": 0.232629602463519,
396
- "train_runtime": 1905.8691,
397
- "train_samples_per_second": 38.177,
398
- "train_steps_per_second": 1.28
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 4.458889961242676,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.5533,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7268170426065163,
21
+ "eval_f1": 0.6326680574676724,
22
+ "eval_loss": 0.5133728384971619,
23
+ "eval_precision": 0.6605799373040753,
24
+ "eval_recall": 0.6242044008001455,
25
+ "eval_runtime": 1.774,
26
+ "eval_samples_per_second": 224.915,
27
+ "eval_steps_per_second": 28.185,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 3.8158535957336426,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.4779,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.7418546365914787,
40
+ "eval_f1": 0.7122401394791937,
41
+ "eval_loss": 0.4949621260166168,
42
+ "eval_precision": 0.7053803339517626,
43
+ "eval_recall": 0.734860883797054,
44
+ "eval_runtime": 1.7746,
45
+ "eval_samples_per_second": 224.844,
46
+ "eval_steps_per_second": 28.176,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 3.156679630279541,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.4097,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.8245614035087719,
59
+ "eval_f1": 0.7664715719063545,
60
+ "eval_loss": 0.3772188425064087,
61
+ "eval_precision": 0.8092877840475827,
62
+ "eval_recall": 0.7458628841607565,
63
+ "eval_runtime": 1.8118,
64
+ "eval_samples_per_second": 220.228,
65
+ "eval_steps_per_second": 27.597,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 3.239713191986084,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.3451,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.8446115288220551,
78
+ "eval_f1": 0.8169941409717701,
79
+ "eval_loss": 0.3511227071285248,
80
+ "eval_precision": 0.8104735988883742,
81
+ "eval_recall": 0.8250591016548463,
82
+ "eval_runtime": 1.8097,
83
+ "eval_samples_per_second": 220.479,
84
+ "eval_steps_per_second": 27.629,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 2.155226707458496,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.2959,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.8546365914786967,
97
+ "eval_f1": 0.8255172205802521,
98
+ "eval_loss": 0.32013869285583496,
99
+ "eval_precision": 0.8239495798319327,
100
+ "eval_recall": 0.8271503909801782,
101
+ "eval_runtime": 1.81,
102
+ "eval_samples_per_second": 220.445,
103
+ "eval_steps_per_second": 27.625,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 2.97943115234375,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.2727,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.8646616541353384,
116
+ "eval_f1": 0.8447157518450185,
117
+ "eval_loss": 0.3176342844963074,
118
+ "eval_precision": 0.8325401217487549,
119
+ "eval_recall": 0.864248045099109,
120
+ "eval_runtime": 1.8128,
121
+ "eval_samples_per_second": 220.097,
122
+ "eval_steps_per_second": 27.581,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 1.0954539775848389,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.2595,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.87468671679198,
135
+ "eval_f1": 0.8524146298159436,
136
+ "eval_loss": 0.2958522439002991,
137
+ "eval_precision": 0.8451250578971746,
138
+ "eval_recall": 0.8613384251682124,
139
+ "eval_runtime": 1.8171,
140
+ "eval_samples_per_second": 219.582,
141
+ "eval_steps_per_second": 27.516,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 6.516312122344971,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.2409,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8897243107769424,
154
+ "eval_f1": 0.8649122807017544,
155
+ "eval_loss": 0.28329744935035706,
156
+ "eval_precision": 0.8710116366366366,
157
+ "eval_recall": 0.8594744498999818,
158
+ "eval_runtime": 1.8171,
159
+ "eval_samples_per_second": 219.577,
160
+ "eval_steps_per_second": 27.516,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 6.709987163543701,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.2298,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.8771929824561403,
173
+ "eval_f1": 0.850729517396184,
174
+ "eval_loss": 0.2893889546394348,
175
+ "eval_precision": 0.8535087719298247,
176
+ "eval_recall": 0.8481087470449173,
177
+ "eval_runtime": 1.8128,
178
+ "eval_samples_per_second": 220.099,
179
+ "eval_steps_per_second": 27.581,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 4.345912933349609,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.2221,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.8872180451127819,
192
+ "eval_f1": 0.8614765038536611,
193
+ "eval_loss": 0.2884393632411957,
194
+ "eval_precision": 0.8686536646744258,
195
+ "eval_recall": 0.8552009456264775,
196
+ "eval_runtime": 1.8154,
197
+ "eval_samples_per_second": 219.786,
198
+ "eval_steps_per_second": 27.542,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 2.3872387409210205,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.1986,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
  "eval_accuracy": 0.8847117794486216,
211
  "eval_f1": 0.8587719298245614,
212
+ "eval_loss": 0.2855367362499237,
213
  "eval_precision": 0.864771021021021,
214
  "eval_recall": 0.8534278959810875,
215
+ "eval_runtime": 1.8165,
216
+ "eval_samples_per_second": 219.656,
217
+ "eval_steps_per_second": 27.526,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 5.007177829742432,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.1964,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.8822055137844611,
230
+ "eval_f1": 0.8521068445832446,
231
+ "eval_loss": 0.29210031032562256,
232
+ "eval_precision": 0.8693800752624282,
233
+ "eval_recall": 0.8391525731951264,
234
+ "eval_runtime": 1.8152,
235
+ "eval_samples_per_second": 219.815,
236
+ "eval_steps_per_second": 27.546,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 1.4582099914550781,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.1783,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.8897243107769424,
249
+ "eval_f1": 0.8649122807017544,
250
+ "eval_loss": 0.3103856146335602,
251
+ "eval_precision": 0.8710116366366366,
252
+ "eval_recall": 0.8594744498999818,
253
+ "eval_runtime": 1.8138,
254
+ "eval_samples_per_second": 219.982,
255
+ "eval_steps_per_second": 27.567,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 6.738508701324463,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.1788,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.8897243107769424,
268
+ "eval_f1": 0.8689068100358424,
269
+ "eval_loss": 0.3015482425689697,
270
+ "eval_precision": 0.863953693884765,
271
+ "eval_recall": 0.8744771776686671,
272
+ "eval_runtime": 1.8152,
273
+ "eval_samples_per_second": 219.806,
274
+ "eval_steps_per_second": 27.545,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 2.6257522106170654,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.172,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.8847117794486216,
287
+ "eval_f1": 0.8595070422535211,
288
+ "eval_loss": 0.3011764883995056,
289
+ "eval_precision": 0.8633733523114054,
290
+ "eval_recall": 0.8559283506092017,
291
+ "eval_runtime": 1.8131,
292
+ "eval_samples_per_second": 220.062,
293
+ "eval_steps_per_second": 27.577,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 4.563363552093506,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.1563,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.8897243107769424,
306
+ "eval_f1": 0.8695225637671682,
307
+ "eval_loss": 0.3159307837486267,
308
+ "eval_precision": 0.8631532846715328,
309
+ "eval_recall": 0.8769776322967813,
310
+ "eval_runtime": 1.8172,
311
+ "eval_samples_per_second": 219.574,
312
+ "eval_steps_per_second": 27.516,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 1.206107258796692,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.1512,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.8847117794486216,
325
+ "eval_f1": 0.8572517421602788,
326
+ "eval_loss": 0.32489535212516785,
327
+ "eval_precision": 0.8679426449878376,
328
+ "eval_recall": 0.8484269867248591,
329
+ "eval_runtime": 1.8131,
330
+ "eval_samples_per_second": 220.063,
331
+ "eval_steps_per_second": 27.577,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 4.8656744956970215,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.151,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.8822055137844611,
344
+ "eval_f1": 0.855319904024935,
345
+ "eval_loss": 0.3245084285736084,
346
+ "eval_precision": 0.862378106322743,
347
+ "eval_recall": 0.8491543917075832,
348
+ "eval_runtime": 1.8159,
349
+ "eval_samples_per_second": 219.726,
350
+ "eval_steps_per_second": 27.535,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 3.600020170211792,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.1461,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.8872180451127819,
363
+ "eval_f1": 0.8614765038536611,
364
+ "eval_loss": 0.328171044588089,
365
+ "eval_precision": 0.8686536646744258,
366
+ "eval_recall": 0.8552009456264775,
367
+ "eval_runtime": 1.8138,
368
+ "eval_samples_per_second": 219.976,
369
+ "eval_steps_per_second": 27.566,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 3.394913673400879,
375
  "learning_rate": 0.0,
376
+ "loss": 0.1555,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.8872180451127819,
382
+ "eval_f1": 0.8622036668943447,
383
+ "eval_loss": 0.32483023405075073,
384
+ "eval_precision": 0.8671602787456446,
385
+ "eval_recall": 0.8577014002545917,
386
+ "eval_runtime": 1.8125,
387
+ "eval_samples_per_second": 220.138,
388
+ "eval_steps_per_second": 27.586,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8444128359504000.0,
395
+ "train_loss": 0.2495564101172275,
396
+ "train_runtime": 624.4298,
397
+ "train_samples_per_second": 116.522,
398
+ "train_steps_per_second": 3.908
399
  }
400
  ],
401
  "logging_steps": 500,