apwic commited on
Commit
30f98a5
1 Parent(s): 9e8ac44

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_accuracy": 0.849624060150376,
4
- "eval_f1": 0.8176861216035092,
5
- "eval_loss": 0.32215815782546997,
6
- "eval_precision": 0.8193355786895284,
7
- "eval_recall": 0.8161029278050556,
8
- "eval_runtime": 4.9225,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 81.057,
11
- "eval_steps_per_second": 10.158,
12
- "train_loss": 0.39131999406658236,
13
- "train_runtime": 117.1148,
14
  "train_samples": 3638,
15
- "train_samples_per_second": 31.064,
16
- "train_steps_per_second": 1.042
17
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.9047619047619048,
4
+ "eval_f1": 0.8878351186601172,
5
+ "eval_loss": 0.7535876035690308,
6
+ "eval_precision": 0.879776516905975,
7
+ "eval_recall": 0.8976177486815784,
8
+ "eval_runtime": 4.6553,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 85.708,
11
+ "eval_steps_per_second": 10.74,
12
+ "train_loss": 0.06173487283655855,
13
+ "train_runtime": 2765.1299,
14
  "train_samples": 3638,
15
+ "train_samples_per_second": 26.313,
16
+ "train_steps_per_second": 0.882
17
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_accuracy": 0.849624060150376,
4
- "eval_f1": 0.8176861216035092,
5
- "eval_loss": 0.32215815782546997,
6
- "eval_precision": 0.8193355786895284,
7
- "eval_recall": 0.8161029278050556,
8
- "eval_runtime": 4.9225,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 81.057,
11
- "eval_steps_per_second": 10.158
12
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.9047619047619048,
4
+ "eval_f1": 0.8878351186601172,
5
+ "eval_loss": 0.7535876035690308,
6
+ "eval_precision": 0.879776516905975,
7
+ "eval_recall": 0.8976177486815784,
8
+ "eval_runtime": 4.6553,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 85.708,
11
+ "eval_steps_per_second": 10.74
12
  }
predict_results.txt CHANGED
@@ -1,20 +1,20 @@
1
  index prediction
2
  0 1
3
- 1 0
4
  2 1
5
  3 1
6
- 4 1
7
  5 1
8
  6 1
9
  7 1
10
  8 0
11
- 9 0
12
  10 1
13
  11 1
14
  12 1
15
  13 1
16
  14 1
17
- 15 1
18
  16 1
19
  17 1
20
  18 1
@@ -23,12 +23,12 @@ index prediction
23
  21 1
24
  22 1
25
  23 1
26
- 24 1
27
- 25 0
28
  26 1
29
- 27 0
30
  28 1
31
- 29 0
32
  30 1
33
  31 1
34
  32 1
@@ -38,20 +38,20 @@ index prediction
38
  36 1
39
  37 1
40
  38 1
41
- 39 0
42
  40 1
43
  41 1
44
  42 1
45
- 43 0
46
- 44 0
47
- 45 0
48
- 46 0
49
  47 1
50
  48 1
51
  49 0
52
  50 1
53
  51 1
54
- 52 0
55
  53 1
56
  54 1
57
  55 1
@@ -63,7 +63,7 @@ index prediction
63
  61 1
64
  62 1
65
  63 1
66
- 64 0
67
  65 1
68
  66 1
69
  67 1
@@ -78,8 +78,8 @@ index prediction
78
  76 1
79
  77 0
80
  78 1
81
- 79 0
82
- 80 0
83
  81 0
84
  82 1
85
  83 1
@@ -91,7 +91,7 @@ index prediction
91
  89 1
92
  90 1
93
  91 1
94
- 92 0
95
  93 1
96
  94 1
97
  95 1
@@ -104,7 +104,7 @@ index prediction
104
  102 1
105
  103 1
106
  104 1
107
- 105 0
108
  106 1
109
  107 1
110
  108 1
@@ -112,7 +112,7 @@ index prediction
112
  110 1
113
  111 1
114
  112 1
115
- 113 1
116
  114 1
117
  115 1
118
  116 1
@@ -143,12 +143,12 @@ index prediction
143
  141 1
144
  142 1
145
  143 1
146
- 144 0
147
  145 1
148
  146 1
149
  147 1
150
  148 1
151
- 149 0
152
  150 1
153
  151 1
154
  152 1
@@ -163,24 +163,24 @@ index prediction
163
  161 1
164
  162 1
165
  163 1
166
- 164 0
167
  165 0
168
  166 1
169
  167 1
170
  168 1
171
- 169 0
172
- 170 0
173
  171 1
174
- 172 0
175
  173 0
176
  174 1
177
  175 1
178
- 176 0
179
  177 0
180
  178 1
181
  179 1
182
  180 1
183
- 181 1
184
  182 1
185
  183 1
186
  184 1
@@ -189,7 +189,7 @@ index prediction
189
  187 1
190
  188 1
191
  189 1
192
- 190 0
193
  191 1
194
  192 1
195
  193 1
@@ -197,7 +197,7 @@ index prediction
197
  195 1
198
  196 1
199
  197 1
200
- 198 0
201
  199 0
202
  200 1
203
  201 1
@@ -209,15 +209,15 @@ index prediction
209
  207 1
210
  208 1
211
  209 1
212
- 210 0
213
  211 1
214
  212 1
215
  213 1
216
  214 0
217
  215 1
218
  216 0
219
- 217 1
220
- 218 0
221
  219 1
222
  220 0
223
  221 1
@@ -227,11 +227,11 @@ index prediction
227
  225 1
228
  226 0
229
  227 0
230
- 228 0
231
- 229 0
232
- 230 0
233
  231 1
234
- 232 0
235
  233 1
236
  234 1
237
  235 1
@@ -245,10 +245,10 @@ index prediction
245
  243 1
246
  244 1
247
  245 1
248
- 246 1
249
  247 1
250
- 248 0
251
- 249 1
252
  250 0
253
  251 1
254
  252 1
@@ -296,7 +296,7 @@ index prediction
296
  294 1
297
  295 1
298
  296 1
299
- 297 1
300
  298 0
301
  299 0
302
  300 0
@@ -316,7 +316,7 @@ index prediction
316
  314 0
317
  315 0
318
  316 0
319
- 317 1
320
  318 1
321
  319 0
322
  320 0
@@ -327,17 +327,17 @@ index prediction
327
  325 0
328
  326 0
329
  327 0
330
- 328 0
331
  329 0
332
  330 1
333
- 331 0
334
  332 0
335
  333 0
336
  334 0
337
  335 0
338
  336 0
339
  337 0
340
- 338 0
341
  339 0
342
  340 0
343
  341 0
@@ -350,7 +350,7 @@ index prediction
350
  348 0
351
  349 0
352
  350 0
353
- 351 1
354
  352 0
355
  353 0
356
  354 0
@@ -363,9 +363,9 @@ index prediction
363
  361 0
364
  362 0
365
  363 0
366
- 364 0
367
  365 0
368
- 366 0
369
  367 0
370
  368 0
371
  369 0
@@ -375,7 +375,7 @@ index prediction
375
  373 0
376
  374 0
377
  375 0
378
- 376 1
379
  377 0
380
  378 0
381
  379 0
@@ -400,8 +400,8 @@ index prediction
400
  398 1
401
  399 0
402
  400 0
403
- 401 1
404
- 402 0
405
  403 0
406
  404 0
407
  405 0
@@ -419,7 +419,7 @@ index prediction
419
  417 0
420
  418 0
421
  419 0
422
- 420 0
423
  421 0
424
  422 0
425
  423 0
@@ -446,12 +446,12 @@ index prediction
446
  444 0
447
  445 0
448
  446 0
449
- 447 0
450
  448 0
451
  449 0
452
  450 0
453
  451 0
454
- 452 1
455
  453 0
456
  454 0
457
  455 0
@@ -517,7 +517,7 @@ index prediction
517
  515 0
518
  516 0
519
  517 0
520
- 518 1
521
  519 0
522
  520 0
523
  521 0
@@ -535,13 +535,13 @@ index prediction
535
  533 0
536
  534 0
537
  535 0
538
- 536 0
539
  537 0
540
  538 0
541
  539 0
542
  540 0
543
  541 0
544
- 542 1
545
  543 0
546
  544 0
547
  545 0
@@ -587,7 +587,7 @@ index prediction
587
  585 0
588
  586 0
589
  587 0
590
- 588 1
591
  589 0
592
  590 0
593
  591 0
@@ -595,7 +595,7 @@ index prediction
595
  593 0
596
  594 0
597
  595 0
598
- 596 1
599
  597 0
600
  598 0
601
  599 0
@@ -607,10 +607,10 @@ index prediction
607
  605 0
608
  606 0
609
  607 0
610
- 608 0
611
  609 0
612
  610 1
613
- 611 1
614
  612 0
615
  613 0
616
  614 0
@@ -631,10 +631,10 @@ index prediction
631
  629 0
632
  630 0
633
  631 0
634
- 632 0
635
  633 1
636
  634 0
637
- 635 1
638
  636 0
639
  637 0
640
  638 0
@@ -662,7 +662,7 @@ index prediction
662
  660 0
663
  661 0
664
  662 0
665
- 663 0
666
  664 0
667
  665 0
668
  666 0
@@ -702,7 +702,7 @@ index prediction
702
  700 0
703
  701 0
704
  702 0
705
- 703 0
706
  704 0
707
  705 0
708
  706 0
@@ -751,7 +751,7 @@ index prediction
751
  749 0
752
  750 0
753
  751 0
754
- 752 0
755
  753 0
756
  754 0
757
  755 0
@@ -764,12 +764,12 @@ index prediction
764
  762 0
765
  763 0
766
  764 0
767
- 765 1
768
  766 0
769
  767 0
770
  768 0
771
  769 0
772
- 770 0
773
  771 0
774
  772 0
775
  773 0
@@ -794,7 +794,7 @@ index prediction
794
  792 0
795
  793 0
796
  794 0
797
- 795 0
798
  796 0
799
  797 0
800
  798 0
@@ -808,7 +808,7 @@ index prediction
808
  806 0
809
  807 0
810
  808 0
811
- 809 1
812
  810 0
813
  811 0
814
  812 0
@@ -864,10 +864,10 @@ index prediction
864
  862 0
865
  863 0
866
  864 0
867
- 865 1
868
  866 0
869
  867 0
870
- 868 0
871
  869 0
872
  870 0
873
  871 0
@@ -883,20 +883,20 @@ index prediction
883
  881 0
884
  882 0
885
  883 0
886
- 884 1
887
  885 0
888
  886 0
889
  887 0
890
  888 0
891
  889 0
892
  890 0
893
- 891 0
894
  892 0
895
  893 0
896
- 894 1
897
  895 0
898
  896 0
899
- 897 1
900
  898 0
901
  899 0
902
  900 0
@@ -906,7 +906,7 @@ index prediction
906
  904 0
907
  905 0
908
  906 0
909
- 907 0
910
  908 0
911
  909 0
912
  910 0
@@ -949,12 +949,12 @@ index prediction
949
  947 0
950
  948 0
951
  949 0
952
- 950 1
953
  951 0
954
  952 0
955
  953 0
956
  954 0
957
- 955 0
958
  956 0
959
  957 0
960
  958 0
@@ -962,13 +962,13 @@ index prediction
962
  960 0
963
  961 0
964
  962 0
965
- 963 0
966
  964 0
967
  965 0
968
  966 0
969
  967 0
970
  968 0
971
- 969 1
972
  970 0
973
  971 0
974
  972 0
@@ -985,13 +985,13 @@ index prediction
985
  983 0
986
  984 0
987
  985 0
988
- 986 0
989
  987 0
990
  988 0
991
  989 0
992
  990 0
993
  991 1
994
- 992 1
995
  993 0
996
  994 0
997
  995 0
 
1
  index prediction
2
  0 1
3
+ 1 1
4
  2 1
5
  3 1
6
+ 4 0
7
  5 1
8
  6 1
9
  7 1
10
  8 0
11
+ 9 1
12
  10 1
13
  11 1
14
  12 1
15
  13 1
16
  14 1
17
+ 15 0
18
  16 1
19
  17 1
20
  18 1
 
23
  21 1
24
  22 1
25
  23 1
26
+ 24 0
27
+ 25 1
28
  26 1
29
+ 27 1
30
  28 1
31
+ 29 1
32
  30 1
33
  31 1
34
  32 1
 
38
  36 1
39
  37 1
40
  38 1
41
+ 39 1
42
  40 1
43
  41 1
44
  42 1
45
+ 43 1
46
+ 44 1
47
+ 45 1
48
+ 46 1
49
  47 1
50
  48 1
51
  49 0
52
  50 1
53
  51 1
54
+ 52 1
55
  53 1
56
  54 1
57
  55 1
 
63
  61 1
64
  62 1
65
  63 1
66
+ 64 1
67
  65 1
68
  66 1
69
  67 1
 
78
  76 1
79
  77 0
80
  78 1
81
+ 79 1
82
+ 80 1
83
  81 0
84
  82 1
85
  83 1
 
91
  89 1
92
  90 1
93
  91 1
94
+ 92 1
95
  93 1
96
  94 1
97
  95 1
 
104
  102 1
105
  103 1
106
  104 1
107
+ 105 1
108
  106 1
109
  107 1
110
  108 1
 
112
  110 1
113
  111 1
114
  112 1
115
+ 113 0
116
  114 1
117
  115 1
118
  116 1
 
143
  141 1
144
  142 1
145
  143 1
146
+ 144 1
147
  145 1
148
  146 1
149
  147 1
150
  148 1
151
+ 149 1
152
  150 1
153
  151 1
154
  152 1
 
163
  161 1
164
  162 1
165
  163 1
166
+ 164 1
167
  165 0
168
  166 1
169
  167 1
170
  168 1
171
+ 169 1
172
+ 170 1
173
  171 1
174
+ 172 1
175
  173 0
176
  174 1
177
  175 1
178
+ 176 1
179
  177 0
180
  178 1
181
  179 1
182
  180 1
183
+ 181 0
184
  182 1
185
  183 1
186
  184 1
 
189
  187 1
190
  188 1
191
  189 1
192
+ 190 1
193
  191 1
194
  192 1
195
  193 1
 
197
  195 1
198
  196 1
199
  197 1
200
+ 198 1
201
  199 0
202
  200 1
203
  201 1
 
209
  207 1
210
  208 1
211
  209 1
212
+ 210 1
213
  211 1
214
  212 1
215
  213 1
216
  214 0
217
  215 1
218
  216 0
219
+ 217 0
220
+ 218 1
221
  219 1
222
  220 0
223
  221 1
 
227
  225 1
228
  226 0
229
  227 0
230
+ 228 1
231
+ 229 1
232
+ 230 1
233
  231 1
234
+ 232 1
235
  233 1
236
  234 1
237
  235 1
 
245
  243 1
246
  244 1
247
  245 1
248
+ 246 0
249
  247 1
250
+ 248 1
251
+ 249 0
252
  250 0
253
  251 1
254
  252 1
 
296
  294 1
297
  295 1
298
  296 1
299
+ 297 0
300
  298 0
301
  299 0
302
  300 0
 
316
  314 0
317
  315 0
318
  316 0
319
+ 317 0
320
  318 1
321
  319 0
322
  320 0
 
327
  325 0
328
  326 0
329
  327 0
330
+ 328 1
331
  329 0
332
  330 1
333
+ 331 1
334
  332 0
335
  333 0
336
  334 0
337
  335 0
338
  336 0
339
  337 0
340
+ 338 1
341
  339 0
342
  340 0
343
  341 0
 
350
  348 0
351
  349 0
352
  350 0
353
+ 351 0
354
  352 0
355
  353 0
356
  354 0
 
363
  361 0
364
  362 0
365
  363 0
366
+ 364 1
367
  365 0
368
+ 366 1
369
  367 0
370
  368 0
371
  369 0
 
375
  373 0
376
  374 0
377
  375 0
378
+ 376 0
379
  377 0
380
  378 0
381
  379 0
 
400
  398 1
401
  399 0
402
  400 0
403
+ 401 0
404
+ 402 1
405
  403 0
406
  404 0
407
  405 0
 
419
  417 0
420
  418 0
421
  419 0
422
+ 420 1
423
  421 0
424
  422 0
425
  423 0
 
446
  444 0
447
  445 0
448
  446 0
449
+ 447 1
450
  448 0
451
  449 0
452
  450 0
453
  451 0
454
+ 452 0
455
  453 0
456
  454 0
457
  455 0
 
517
  515 0
518
  516 0
519
  517 0
520
+ 518 0
521
  519 0
522
  520 0
523
  521 0
 
535
  533 0
536
  534 0
537
  535 0
538
+ 536 1
539
  537 0
540
  538 0
541
  539 0
542
  540 0
543
  541 0
544
+ 542 0
545
  543 0
546
  544 0
547
  545 0
 
587
  585 0
588
  586 0
589
  587 0
590
+ 588 0
591
  589 0
592
  590 0
593
  591 0
 
595
  593 0
596
  594 0
597
  595 0
598
+ 596 0
599
  597 0
600
  598 0
601
  599 0
 
607
  605 0
608
  606 0
609
  607 0
610
+ 608 1
611
  609 0
612
  610 1
613
+ 611 0
614
  612 0
615
  613 0
616
  614 0
 
631
  629 0
632
  630 0
633
  631 0
634
+ 632 1
635
  633 1
636
  634 0
637
+ 635 0
638
  636 0
639
  637 0
640
  638 0
 
662
  660 0
663
  661 0
664
  662 0
665
+ 663 1
666
  664 0
667
  665 0
668
  666 0
 
702
  700 0
703
  701 0
704
  702 0
705
+ 703 1
706
  704 0
707
  705 0
708
  706 0
 
751
  749 0
752
  750 0
753
  751 0
754
+ 752 1
755
  753 0
756
  754 0
757
  755 0
 
764
  762 0
765
  763 0
766
  764 0
767
+ 765 0
768
  766 0
769
  767 0
770
  768 0
771
  769 0
772
+ 770 1
773
  771 0
774
  772 0
775
  773 0
 
794
  792 0
795
  793 0
796
  794 0
797
+ 795 1
798
  796 0
799
  797 0
800
  798 0
 
808
  806 0
809
  807 0
810
  808 0
811
+ 809 0
812
  810 0
813
  811 0
814
  812 0
 
864
  862 0
865
  863 0
866
  864 0
867
+ 865 0
868
  866 0
869
  867 0
870
+ 868 1
871
  869 0
872
  870 0
873
  871 0
 
883
  881 0
884
  882 0
885
  883 0
886
+ 884 0
887
  885 0
888
  886 0
889
  887 0
890
  888 0
891
  889 0
892
  890 0
893
+ 891 1
894
  892 0
895
  893 0
896
+ 894 0
897
  895 0
898
  896 0
899
+ 897 0
900
  898 0
901
  899 0
902
  900 0
 
906
  904 0
907
  905 0
908
  906 0
909
+ 907 1
910
  908 0
911
  909 0
912
  910 0
 
949
  947 0
950
  948 0
951
  949 0
952
+ 950 0
953
  951 0
954
  952 0
955
  953 0
956
  954 0
957
+ 955 1
958
  956 0
959
  957 0
960
  958 0
 
962
  960 0
963
  961 0
964
  962 0
965
+ 963 1
966
  964 0
967
  965 0
968
  966 0
969
  967 0
970
  968 0
971
+ 969 0
972
  970 0
973
  971 0
974
  972 0
 
985
  983 0
986
  984 0
987
  985 0
988
+ 986 1
989
  987 0
990
  988 0
991
  989 0
992
  990 0
993
  991 1
994
+ 992 0
995
  993 0
996
  994 0
997
  995 0
runs/May14_19-19-36_indolem-petl-vm/events.out.tfevents.1715717191.indolem-petl-vm.306885.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da2f2a468694ab3e44b6d4566861949e44aa1ddbbce5c0833bc08516b0b806de
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "train_loss": 0.39131999406658236,
4
- "train_runtime": 117.1148,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 31.064,
7
- "train_steps_per_second": 1.042
8
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "train_loss": 0.06173487283655855,
4
+ "train_runtime": 2765.1299,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 26.313,
7
+ "train_steps_per_second": 0.882
8
  }
trainer_state.json CHANGED
@@ -1,48 +1,409 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 122,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 11.240641593933105,
14
- "learning_rate": 0.0,
15
- "loss": 0.3913,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.849624060150376,
21
- "eval_f1": 0.8176861216035092,
22
- "eval_loss": 0.32215815782546997,
23
- "eval_precision": 0.8193355786895284,
24
- "eval_recall": 0.8161029278050556,
25
- "eval_runtime": 4.8398,
26
- "eval_samples_per_second": 82.441,
27
- "eval_steps_per_second": 10.331,
28
  "step": 122
29
  },
30
  {
31
- "epoch": 1.0,
32
- "step": 122,
33
- "total_flos": 379208121808800.0,
34
- "train_loss": 0.39131999406658236,
35
- "train_runtime": 117.1148,
36
- "train_samples_per_second": 31.064,
37
- "train_steps_per_second": 1.042
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  }
39
  ],
40
  "logging_steps": 500,
41
- "max_steps": 122,
42
  "num_input_tokens_seen": 0,
43
- "num_train_epochs": 1,
44
  "save_steps": 500,
45
- "total_flos": 379208121808800.0,
46
  "train_batch_size": 30,
47
  "trial_name": null,
48
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.0,
5
  "eval_steps": 500,
6
+ "global_step": 2440,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 4.053804397583008,
14
+ "learning_rate": 4.75e-05,
15
+ "loss": 0.4355,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.8696741854636592,
21
+ "eval_f1": 0.835906358747232,
22
+ "eval_loss": 0.3243214786052704,
23
+ "eval_precision": 0.853844109243139,
24
+ "eval_recall": 0.8227859610838335,
25
+ "eval_runtime": 4.9832,
26
+ "eval_samples_per_second": 80.069,
27
+ "eval_steps_per_second": 10.034,
28
  "step": 122
29
  },
30
  {
31
+ "epoch": 2.0,
32
+ "grad_norm": 23.150257110595703,
33
+ "learning_rate": 4.5e-05,
34
+ "loss": 0.2295,
35
+ "step": 244
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_accuracy": 0.8897243107769424,
40
+ "eval_f1": 0.8701248742380304,
41
+ "eval_loss": 0.3046626150608063,
42
+ "eval_precision": 0.8624507874015748,
43
+ "eval_recall": 0.8794780869248955,
44
+ "eval_runtime": 4.975,
45
+ "eval_samples_per_second": 80.2,
46
+ "eval_steps_per_second": 10.05,
47
+ "step": 244
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "grad_norm": 0.44390636682510376,
52
+ "learning_rate": 4.25e-05,
53
+ "loss": 0.1337,
54
+ "step": 366
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_accuracy": 0.899749373433584,
59
+ "eval_f1": 0.879667048676036,
60
+ "eval_loss": 0.3747338354587555,
61
+ "eval_precision": 0.8778361344537815,
62
+ "eval_recall": 0.8815693762502272,
63
+ "eval_runtime": 4.9774,
64
+ "eval_samples_per_second": 80.162,
65
+ "eval_steps_per_second": 10.045,
66
+ "step": 366
67
+ },
68
+ {
69
+ "epoch": 4.0,
70
+ "grad_norm": 15.949886322021484,
71
+ "learning_rate": 4e-05,
72
+ "loss": 0.1038,
73
+ "step": 488
74
+ },
75
+ {
76
+ "epoch": 4.0,
77
+ "eval_accuracy": 0.8822055137844611,
78
+ "eval_f1": 0.8651222336500356,
79
+ "eval_loss": 0.41882890462875366,
80
+ "eval_precision": 0.8518339768339769,
81
+ "eval_recall": 0.8866612111292962,
82
+ "eval_runtime": 5.0099,
83
+ "eval_samples_per_second": 79.643,
84
+ "eval_steps_per_second": 9.98,
85
+ "step": 488
86
+ },
87
+ {
88
+ "epoch": 5.0,
89
+ "grad_norm": 3.7783217430114746,
90
+ "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.072,
92
+ "step": 610
93
+ },
94
+ {
95
+ "epoch": 5.0,
96
+ "eval_accuracy": 0.8872180451127819,
97
+ "eval_f1": 0.8622036668943447,
98
+ "eval_loss": 0.6270534992218018,
99
+ "eval_precision": 0.8671602787456446,
100
+ "eval_recall": 0.8577014002545917,
101
+ "eval_runtime": 5.0204,
102
+ "eval_samples_per_second": 79.476,
103
+ "eval_steps_per_second": 9.959,
104
+ "step": 610
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "grad_norm": 44.57243347167969,
109
+ "learning_rate": 3.5e-05,
110
+ "loss": 0.0462,
111
+ "step": 732
112
+ },
113
+ {
114
+ "epoch": 6.0,
115
+ "eval_accuracy": 0.8897243107769424,
116
+ "eval_f1": 0.8695225637671682,
117
+ "eval_loss": 0.6129250526428223,
118
+ "eval_precision": 0.8631532846715328,
119
+ "eval_recall": 0.8769776322967813,
120
+ "eval_runtime": 4.9643,
121
+ "eval_samples_per_second": 80.375,
122
+ "eval_steps_per_second": 10.072,
123
+ "step": 732
124
+ },
125
+ {
126
+ "epoch": 7.0,
127
+ "grad_norm": 0.024074144661426544,
128
+ "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.0459,
130
+ "step": 854
131
+ },
132
+ {
133
+ "epoch": 7.0,
134
+ "eval_accuracy": 0.8897243107769424,
135
+ "eval_f1": 0.8649122807017544,
136
+ "eval_loss": 0.5890637636184692,
137
+ "eval_precision": 0.8710116366366366,
138
+ "eval_recall": 0.8594744498999818,
139
+ "eval_runtime": 4.9832,
140
+ "eval_samples_per_second": 80.069,
141
+ "eval_steps_per_second": 10.034,
142
+ "step": 854
143
+ },
144
+ {
145
+ "epoch": 8.0,
146
+ "grad_norm": 0.022918157279491425,
147
+ "learning_rate": 3e-05,
148
+ "loss": 0.0391,
149
+ "step": 976
150
+ },
151
+ {
152
+ "epoch": 8.0,
153
+ "eval_accuracy": 0.8872180451127819,
154
+ "eval_f1": 0.8680720368560659,
155
+ "eval_loss": 0.5972921252250671,
156
+ "eval_precision": 0.8587217615098657,
157
+ "eval_recall": 0.8802054919076197,
158
+ "eval_runtime": 5.0003,
159
+ "eval_samples_per_second": 79.795,
160
+ "eval_steps_per_second": 9.999,
161
+ "step": 976
162
+ },
163
+ {
164
+ "epoch": 9.0,
165
+ "grad_norm": 106.23094177246094,
166
+ "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.0307,
168
+ "step": 1098
169
+ },
170
+ {
171
+ "epoch": 9.0,
172
+ "eval_accuracy": 0.87468671679198,
173
+ "eval_f1": 0.8584865509022812,
174
+ "eval_loss": 0.7086873054504395,
175
+ "eval_precision": 0.8441043083900227,
176
+ "eval_recall": 0.8863429714493545,
177
+ "eval_runtime": 4.9859,
178
+ "eval_samples_per_second": 80.026,
179
+ "eval_steps_per_second": 10.028,
180
+ "step": 1098
181
+ },
182
+ {
183
+ "epoch": 10.0,
184
+ "grad_norm": 0.024997469037771225,
185
+ "learning_rate": 2.5e-05,
186
+ "loss": 0.0199,
187
+ "step": 1220
188
+ },
189
+ {
190
+ "epoch": 10.0,
191
+ "eval_accuracy": 0.8972431077694235,
192
+ "eval_f1": 0.8717112228173498,
193
+ "eval_loss": 0.7264124155044556,
194
+ "eval_precision": 0.8869295958279009,
195
+ "eval_recall": 0.8597926895799237,
196
+ "eval_runtime": 4.9651,
197
+ "eval_samples_per_second": 80.361,
198
+ "eval_steps_per_second": 10.07,
199
+ "step": 1220
200
+ },
201
+ {
202
+ "epoch": 11.0,
203
+ "grad_norm": 0.004392046481370926,
204
+ "learning_rate": 2.25e-05,
205
+ "loss": 0.0105,
206
+ "step": 1342
207
+ },
208
+ {
209
+ "epoch": 11.0,
210
+ "eval_accuracy": 0.8972431077694235,
211
+ "eval_f1": 0.8757339815412664,
212
+ "eval_loss": 0.6738360524177551,
213
+ "eval_precision": 0.8766906299500427,
214
+ "eval_recall": 0.8747954173486088,
215
+ "eval_runtime": 5.0179,
216
+ "eval_samples_per_second": 79.516,
217
+ "eval_steps_per_second": 9.964,
218
+ "step": 1342
219
+ },
220
+ {
221
+ "epoch": 12.0,
222
+ "grad_norm": 0.004026818089187145,
223
+ "learning_rate": 2e-05,
224
+ "loss": 0.0131,
225
+ "step": 1464
226
+ },
227
+ {
228
+ "epoch": 12.0,
229
+ "eval_accuracy": 0.899749373433584,
230
+ "eval_f1": 0.882467302933899,
231
+ "eval_loss": 0.7488105297088623,
232
+ "eval_precision": 0.8732988802756245,
233
+ "eval_recall": 0.8940716493907983,
234
+ "eval_runtime": 5.0007,
235
+ "eval_samples_per_second": 79.788,
236
+ "eval_steps_per_second": 9.999,
237
+ "step": 1464
238
+ },
239
+ {
240
+ "epoch": 13.0,
241
+ "grad_norm": 0.004543425515294075,
242
+ "learning_rate": 1.75e-05,
243
+ "loss": 0.0102,
244
+ "step": 1586
245
+ },
246
+ {
247
+ "epoch": 13.0,
248
+ "eval_accuracy": 0.8972431077694235,
249
+ "eval_f1": 0.8792560061999484,
250
+ "eval_loss": 0.7154756784439087,
251
+ "eval_precision": 0.8707622232472325,
252
+ "eval_recall": 0.889798145117294,
253
+ "eval_runtime": 5.0136,
254
+ "eval_samples_per_second": 79.584,
255
+ "eval_steps_per_second": 9.973,
256
+ "step": 1586
257
+ },
258
+ {
259
+ "epoch": 14.0,
260
+ "grad_norm": 0.0037931231781840324,
261
+ "learning_rate": 1.5e-05,
262
+ "loss": 0.0061,
263
+ "step": 1708
264
+ },
265
+ {
266
+ "epoch": 14.0,
267
+ "eval_accuracy": 0.9072681704260651,
268
+ "eval_f1": 0.8894993300948346,
269
+ "eval_loss": 0.7196279168128967,
270
+ "eval_precision": 0.8850535598035154,
271
+ "eval_recall": 0.8943898890707401,
272
+ "eval_runtime": 5.009,
273
+ "eval_samples_per_second": 79.657,
274
+ "eval_steps_per_second": 9.982,
275
+ "step": 1708
276
+ },
277
+ {
278
+ "epoch": 15.0,
279
+ "grad_norm": 0.0027608012314885855,
280
+ "learning_rate": 1.25e-05,
281
+ "loss": 0.0138,
282
+ "step": 1830
283
+ },
284
+ {
285
+ "epoch": 15.0,
286
+ "eval_accuracy": 0.9022556390977443,
287
+ "eval_f1": 0.884617951284618,
288
+ "eval_loss": 0.7618029713630676,
289
+ "eval_precision": 0.8772893772893773,
290
+ "eval_recall": 0.8933442444080741,
291
+ "eval_runtime": 5.0251,
292
+ "eval_samples_per_second": 79.401,
293
+ "eval_steps_per_second": 9.95,
294
+ "step": 1830
295
+ },
296
+ {
297
+ "epoch": 16.0,
298
+ "grad_norm": 0.0022813216783106327,
299
+ "learning_rate": 1e-05,
300
+ "loss": 0.0075,
301
+ "step": 1952
302
+ },
303
+ {
304
+ "epoch": 16.0,
305
+ "eval_accuracy": 0.9047619047619048,
306
+ "eval_f1": 0.8873149414352814,
307
+ "eval_loss": 0.7252941727638245,
308
+ "eval_precision": 0.8806277372262774,
309
+ "eval_recall": 0.8951172940534643,
310
+ "eval_runtime": 5.0138,
311
+ "eval_samples_per_second": 79.581,
312
+ "eval_steps_per_second": 9.973,
313
+ "step": 1952
314
+ },
315
+ {
316
+ "epoch": 17.0,
317
+ "grad_norm": 15.638340950012207,
318
+ "learning_rate": 7.5e-06,
319
+ "loss": 0.0063,
320
+ "step": 2074
321
+ },
322
+ {
323
+ "epoch": 17.0,
324
+ "eval_accuracy": 0.9022556390977443,
325
+ "eval_f1": 0.8840781602687784,
326
+ "eval_loss": 0.7560042142868042,
327
+ "eval_precision": 0.87816715542522,
328
+ "eval_recall": 0.89084378977996,
329
+ "eval_runtime": 5.0052,
330
+ "eval_samples_per_second": 79.718,
331
+ "eval_steps_per_second": 9.99,
332
+ "step": 2074
333
+ },
334
+ {
335
+ "epoch": 18.0,
336
+ "grad_norm": 0.002121408935636282,
337
+ "learning_rate": 5e-06,
338
+ "loss": 0.0066,
339
+ "step": 2196
340
+ },
341
+ {
342
+ "epoch": 18.0,
343
+ "eval_accuracy": 0.9022556390977443,
344
+ "eval_f1": 0.8856624319419237,
345
+ "eval_loss": 0.748332142829895,
346
+ "eval_precision": 0.8758364312267658,
347
+ "eval_recall": 0.8983451536643026,
348
+ "eval_runtime": 4.9788,
349
+ "eval_samples_per_second": 80.139,
350
+ "eval_steps_per_second": 10.043,
351
+ "step": 2196
352
+ },
353
+ {
354
+ "epoch": 19.0,
355
+ "grad_norm": 0.004570267163217068,
356
+ "learning_rate": 2.5e-06,
357
+ "loss": 0.0023,
358
+ "step": 2318
359
+ },
360
+ {
361
+ "epoch": 19.0,
362
+ "eval_accuracy": 0.9022556390977443,
363
+ "eval_f1": 0.884617951284618,
364
+ "eval_loss": 0.7535205483436584,
365
+ "eval_precision": 0.8772893772893773,
366
+ "eval_recall": 0.8933442444080741,
367
+ "eval_runtime": 5.0013,
368
+ "eval_samples_per_second": 79.779,
369
+ "eval_steps_per_second": 9.997,
370
+ "step": 2318
371
+ },
372
+ {
373
+ "epoch": 20.0,
374
+ "grad_norm": 0.0018295175395905972,
375
+ "learning_rate": 0.0,
376
+ "loss": 0.0021,
377
+ "step": 2440
378
+ },
379
+ {
380
+ "epoch": 20.0,
381
+ "eval_accuracy": 0.9047619047619048,
382
+ "eval_f1": 0.8878351186601172,
383
+ "eval_loss": 0.7535876035690308,
384
+ "eval_precision": 0.879776516905975,
385
+ "eval_recall": 0.8976177486815784,
386
+ "eval_runtime": 5.0187,
387
+ "eval_samples_per_second": 79.503,
388
+ "eval_steps_per_second": 9.963,
389
+ "step": 2440
390
+ },
391
+ {
392
+ "epoch": 20.0,
393
+ "step": 2440,
394
+ "total_flos": 7584162436176000.0,
395
+ "train_loss": 0.06173487283655855,
396
+ "train_runtime": 2765.1299,
397
+ "train_samples_per_second": 26.313,
398
+ "train_steps_per_second": 0.882
399
  }
400
  ],
401
  "logging_steps": 500,
402
+ "max_steps": 2440,
403
  "num_input_tokens_seen": 0,
404
+ "num_train_epochs": 20,
405
  "save_steps": 500,
406
+ "total_flos": 7584162436176000.0,
407
  "train_batch_size": 30,
408
  "trial_name": null,
409
  "trial_params": null