apwic commited on
Commit
f835a20
1 Parent(s): 087cdcf

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "accuracy": 0.904055390702275,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.8922305764411027,
5
- "eval_f1": 0.8696722245432793,
6
- "eval_loss": 0.29425162076950073,
7
- "eval_precision": 0.8706135006701596,
8
- "eval_recall": 0.8687488634297145,
9
- "eval_runtime": 5.0818,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 78.516,
12
- "eval_steps_per_second": 9.839,
13
- "f1": 0.8862491460015474,
14
- "precision": 0.881173503483252,
15
- "recall": 0.8919373664542485,
16
- "train_loss": 0.2198484170632284,
17
- "train_runtime": 1952.2473,
18
  "train_samples": 3638,
19
- "train_samples_per_second": 37.27,
20
- "train_steps_per_second": 1.25
21
  }
 
1
  {
2
  "accuracy": 0.904055390702275,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.8872180451127819,
5
+ "eval_f1": 0.8656072933585827,
6
+ "eval_loss": 0.30540019273757935,
7
+ "eval_precision": 0.8614399005740664,
8
+ "eval_recall": 0.8702036733951628,
9
+ "eval_runtime": 1.8192,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 219.323,
12
+ "eval_steps_per_second": 27.484,
13
+ "f1": 0.8864593379544456,
14
+ "precision": 0.8807881773399014,
15
+ "recall": 0.892913956531152,
16
+ "train_loss": 0.21972156196344095,
17
+ "train_runtime": 635.9425,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 114.413,
20
+ "train_steps_per_second": 3.837
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.8922305764411027,
4
- "eval_f1": 0.8696722245432793,
5
- "eval_loss": 0.29425162076950073,
6
- "eval_precision": 0.8706135006701596,
7
- "eval_recall": 0.8687488634297145,
8
- "eval_runtime": 5.0818,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 78.516,
11
- "eval_steps_per_second": 9.839
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.8872180451127819,
4
+ "eval_f1": 0.8656072933585827,
5
+ "eval_loss": 0.30540019273757935,
6
+ "eval_precision": 0.8614399005740664,
7
+ "eval_recall": 0.8702036733951628,
8
+ "eval_runtime": 1.8192,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 219.323,
11
+ "eval_steps_per_second": 27.484
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "accuracy": 0.904055390702275,
3
- "f1": 0.8862491460015474,
4
- "precision": 0.881173503483252,
5
- "recall": 0.8919373664542485
6
  }
 
1
  {
2
  "accuracy": 0.904055390702275,
3
+ "f1": 0.8864593379544456,
4
+ "precision": 0.8807881773399014,
5
+ "recall": 0.892913956531152
6
  }
predict_results.txt CHANGED
@@ -5,7 +5,7 @@ index prediction
5
  3 1
6
  4 1
7
  5 1
8
- 6 0
9
  7 1
10
  8 0
11
  9 1
@@ -20,11 +20,11 @@ index prediction
20
  18 1
21
  19 1
22
  20 1
23
- 21 0
24
  22 1
25
  23 1
26
  24 1
27
- 25 0
28
  26 1
29
  27 1
30
  28 1
@@ -82,14 +82,14 @@ index prediction
82
  80 1
83
  81 1
84
  82 1
85
- 83 1
86
  84 1
87
  85 1
88
  86 1
89
  87 1
90
  88 1
91
  89 1
92
- 90 0
93
  91 1
94
  92 1
95
  93 1
@@ -105,7 +105,7 @@ index prediction
105
  103 1
106
  104 1
107
  105 0
108
- 106 1
109
  107 1
110
  108 1
111
  109 0
@@ -144,7 +144,7 @@ index prediction
144
  142 1
145
  143 1
146
  144 1
147
- 145 0
148
  146 1
149
  147 1
150
  148 1
@@ -157,7 +157,7 @@ index prediction
157
  155 1
158
  156 0
159
  157 1
160
- 158 1
161
  159 1
162
  160 1
163
  161 0
@@ -217,7 +217,7 @@ index prediction
217
  215 0
218
  216 1
219
  217 0
220
- 218 1
221
  219 1
222
  220 1
223
  221 1
@@ -232,7 +232,7 @@ index prediction
232
  230 0
233
  231 1
234
  232 1
235
- 233 1
236
  234 1
237
  235 1
238
  236 1
@@ -247,7 +247,7 @@ index prediction
247
  245 1
248
  246 1
249
  247 1
250
- 248 0
251
  249 1
252
  250 1
253
  251 1
@@ -334,14 +334,14 @@ index prediction
334
  332 0
335
  333 0
336
  334 0
337
- 335 0
338
  336 0
339
  337 0
340
  338 0
341
  339 1
342
  340 0
343
  341 0
344
- 342 1
345
  343 0
346
  344 0
347
  345 0
@@ -365,10 +365,10 @@ index prediction
365
  363 0
366
  364 0
367
  365 0
368
- 366 0
369
  367 0
370
  368 0
371
- 369 0
372
  370 0
373
  371 0
374
  372 1
@@ -419,9 +419,9 @@ index prediction
419
  417 0
420
  418 0
421
  419 0
422
- 420 0
423
  421 0
424
- 422 1
425
  423 0
426
  424 0
427
  425 0
@@ -463,7 +463,7 @@ index prediction
463
  461 0
464
  462 0
465
  463 0
466
- 464 1
467
  465 0
468
  466 0
469
  467 0
@@ -595,7 +595,7 @@ index prediction
595
  593 0
596
  594 0
597
  595 0
598
- 596 1
599
  597 1
600
  598 0
601
  599 0
@@ -617,11 +617,11 @@ index prediction
617
  615 0
618
  616 0
619
  617 0
620
- 618 1
621
  619 0
622
  620 0
623
  621 0
624
- 622 0
625
  623 0
626
  624 0
627
  625 0
@@ -667,12 +667,12 @@ index prediction
667
  665 0
668
  666 1
669
  667 0
670
- 668 1
671
  669 0
672
  670 0
673
  671 0
674
  672 0
675
- 673 1
676
  674 1
677
  675 0
678
  676 0
@@ -696,7 +696,7 @@ index prediction
696
  694 0
697
  695 0
698
  696 0
699
- 697 1
700
  698 0
701
  699 0
702
  700 0
@@ -725,9 +725,9 @@ index prediction
725
  723 0
726
  724 0
727
  725 0
728
- 726 1
729
  727 0
730
- 728 1
731
  729 0
732
  730 0
733
  731 0
@@ -798,7 +798,7 @@ index prediction
798
  796 0
799
  797 0
800
  798 1
801
- 799 1
802
  800 0
803
  801 0
804
  802 1
@@ -808,7 +808,7 @@ index prediction
808
  806 0
809
  807 0
810
  808 0
811
- 809 0
812
  810 1
813
  811 0
814
  812 0
@@ -816,7 +816,7 @@ index prediction
816
  814 0
817
  815 0
818
  816 0
819
- 817 0
820
  818 0
821
  819 0
822
  820 0
@@ -831,13 +831,13 @@ index prediction
831
  829 0
832
  830 0
833
  831 1
834
- 832 0
835
  833 0
836
  834 0
837
  835 0
838
  836 0
839
  837 0
840
- 838 1
841
  839 0
842
  840 0
843
  841 0
@@ -859,13 +859,13 @@ index prediction
859
  857 0
860
  858 0
861
  859 0
862
- 860 1
863
  861 0
864
  862 0
865
  863 0
866
  864 0
867
  865 0
868
- 866 1
869
  867 0
870
  868 0
871
  869 0
@@ -885,7 +885,7 @@ index prediction
885
  883 0
886
  884 0
887
  885 0
888
- 886 0
889
  887 0
890
  888 0
891
  889 0
@@ -911,8 +911,8 @@ index prediction
911
  909 0
912
  910 0
913
  911 0
914
- 912 0
915
- 913 0
916
  914 0
917
  915 0
918
  916 0
@@ -923,10 +923,10 @@ index prediction
923
  921 1
924
  922 0
925
  923 0
926
- 924 0
927
  925 0
928
  926 1
929
- 927 0
930
  928 0
931
  929 0
932
  930 0
@@ -944,7 +944,7 @@ index prediction
944
  942 0
945
  943 0
946
  944 0
947
- 945 0
948
  946 0
949
  947 1
950
  948 0
@@ -966,7 +966,7 @@ index prediction
966
  964 0
967
  965 0
968
  966 0
969
- 967 0
970
  968 0
971
  969 0
972
  970 0
 
5
  3 1
6
  4 1
7
  5 1
8
+ 6 1
9
  7 1
10
  8 0
11
  9 1
 
20
  18 1
21
  19 1
22
  20 1
23
+ 21 1
24
  22 1
25
  23 1
26
  24 1
27
+ 25 1
28
  26 1
29
  27 1
30
  28 1
 
82
  80 1
83
  81 1
84
  82 1
85
+ 83 0
86
  84 1
87
  85 1
88
  86 1
89
  87 1
90
  88 1
91
  89 1
92
+ 90 1
93
  91 1
94
  92 1
95
  93 1
 
105
  103 1
106
  104 1
107
  105 0
108
+ 106 0
109
  107 1
110
  108 1
111
  109 0
 
144
  142 1
145
  143 1
146
  144 1
147
+ 145 1
148
  146 1
149
  147 1
150
  148 1
 
157
  155 1
158
  156 0
159
  157 1
160
+ 158 0
161
  159 1
162
  160 1
163
  161 0
 
217
  215 0
218
  216 1
219
  217 0
220
+ 218 0
221
  219 1
222
  220 1
223
  221 1
 
232
  230 0
233
  231 1
234
  232 1
235
+ 233 0
236
  234 1
237
  235 1
238
  236 1
 
247
  245 1
248
  246 1
249
  247 1
250
+ 248 1
251
  249 1
252
  250 1
253
  251 1
 
334
  332 0
335
  333 0
336
  334 0
337
+ 335 1
338
  336 0
339
  337 0
340
  338 0
341
  339 1
342
  340 0
343
  341 0
344
+ 342 0
345
  343 0
346
  344 0
347
  345 0
 
365
  363 0
366
  364 0
367
  365 0
368
+ 366 1
369
  367 0
370
  368 0
371
+ 369 1
372
  370 0
373
  371 0
374
  372 1
 
419
  417 0
420
  418 0
421
  419 0
422
+ 420 1
423
  421 0
424
+ 422 0
425
  423 0
426
  424 0
427
  425 0
 
463
  461 0
464
  462 0
465
  463 0
466
+ 464 0
467
  465 0
468
  466 0
469
  467 0
 
595
  593 0
596
  594 0
597
  595 0
598
+ 596 0
599
  597 1
600
  598 0
601
  599 0
 
617
  615 0
618
  616 0
619
  617 0
620
+ 618 0
621
  619 0
622
  620 0
623
  621 0
624
+ 622 1
625
  623 0
626
  624 0
627
  625 0
 
667
  665 0
668
  666 1
669
  667 0
670
+ 668 0
671
  669 0
672
  670 0
673
  671 0
674
  672 0
675
+ 673 0
676
  674 1
677
  675 0
678
  676 0
 
696
  694 0
697
  695 0
698
  696 0
699
+ 697 0
700
  698 0
701
  699 0
702
  700 0
 
725
  723 0
726
  724 0
727
  725 0
728
+ 726 0
729
  727 0
730
+ 728 0
731
  729 0
732
  730 0
733
  731 0
 
798
  796 0
799
  797 0
800
  798 1
801
+ 799 0
802
  800 0
803
  801 0
804
  802 1
 
808
  806 0
809
  807 0
810
  808 0
811
+ 809 1
812
  810 1
813
  811 0
814
  812 0
 
816
  814 0
817
  815 0
818
  816 0
819
+ 817 1
820
  818 0
821
  819 0
822
  820 0
 
831
  829 0
832
  830 0
833
  831 1
834
+ 832 1
835
  833 0
836
  834 0
837
  835 0
838
  836 0
839
  837 0
840
+ 838 0
841
  839 0
842
  840 0
843
  841 0
 
859
  857 0
860
  858 0
861
  859 0
862
+ 860 0
863
  861 0
864
  862 0
865
  863 0
866
  864 0
867
  865 0
868
+ 866 0
869
  867 0
870
  868 0
871
  869 0
 
885
  883 0
886
  884 0
887
  885 0
888
+ 886 1
889
  887 0
890
  888 0
891
  889 0
 
911
  909 0
912
  910 0
913
  911 0
914
+ 912 1
915
+ 913 1
916
  914 0
917
  915 0
918
  916 0
 
923
  921 1
924
  922 0
925
  923 0
926
+ 924 1
927
  925 0
928
  926 1
929
+ 927 1
930
  928 0
931
  929 0
932
  930 0
 
944
  942 0
945
  943 0
946
  944 0
947
+ 945 1
948
  946 0
949
  947 1
950
  948 0
 
966
  964 0
967
  965 0
968
  966 0
969
+ 967 1
970
  968 0
971
  969 0
972
  970 0
runs/Jun03_14-50-36_a358b85c7679/events.out.tfevents.1717426889.a358b85c7679.152585.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e2aec453ed3bd2226bb57186fd506344302a5388d15a1e7aa2c05df1e09c834
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.2198484170632284,
4
- "train_runtime": 1952.2473,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 37.27,
7
- "train_steps_per_second": 1.25
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.21972156196344095,
4
+ "train_runtime": 635.9425,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 114.413,
7
+ "train_steps_per_second": 3.837
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 4.1507415771484375,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.5452,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
  "eval_accuracy": 0.7468671679197995,
21
- "eval_f1": 0.6572777139941319,
22
- "eval_loss": 0.4919416010379791,
23
- "eval_precision": 0.6922477833692786,
24
- "eval_recall": 0.6458901618476087,
25
- "eval_runtime": 5.1705,
26
- "eval_samples_per_second": 77.169,
27
- "eval_steps_per_second": 9.67,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 3.406769037246704,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.4299,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.8070175438596491,
40
- "eval_f1": 0.7892003156197468,
41
- "eval_loss": 0.4071064293384552,
42
- "eval_precision": 0.7801980454004861,
43
- "eval_recall": 0.8284688125113657,
44
- "eval_runtime": 5.112,
45
- "eval_samples_per_second": 78.052,
46
- "eval_steps_per_second": 9.781,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 4.251235485076904,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.3291,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.8671679197994987,
59
- "eval_f1": 0.8385441718775052,
60
- "eval_loss": 0.3090916574001312,
61
- "eval_precision": 0.8412280701754387,
62
- "eval_recall": 0.8360156392071285,
63
- "eval_runtime": 5.2309,
64
- "eval_samples_per_second": 76.278,
65
- "eval_steps_per_second": 9.559,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 2.2053372859954834,
71
  "learning_rate": 4e-05,
72
- "loss": 0.2887,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.8521303258145363,
78
- "eval_f1": 0.8193336965948074,
79
- "eval_loss": 0.303335577249527,
80
- "eval_precision": 0.8237151567944251,
81
- "eval_recall": 0.8153755228223314,
82
- "eval_runtime": 5.1617,
83
- "eval_samples_per_second": 77.3,
84
- "eval_steps_per_second": 9.687,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 4.25098991394043,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.2579,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.8646616541353384,
97
- "eval_f1": 0.839868600986979,
98
- "eval_loss": 0.28800028562545776,
99
- "eval_precision": 0.834029197080292,
100
- "eval_recall": 0.8467448627023095,
101
- "eval_runtime": 5.0803,
102
- "eval_samples_per_second": 78.539,
103
- "eval_steps_per_second": 9.842,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 4.713165283203125,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.232,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.87468671679198,
116
- "eval_f1": 0.8537390029325513,
117
- "eval_loss": 0.29194238781929016,
118
- "eval_precision": 0.8442805058676086,
119
- "eval_recall": 0.8663393344244408,
120
- "eval_runtime": 5.1321,
121
- "eval_samples_per_second": 77.746,
122
- "eval_steps_per_second": 9.743,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 0.4289064109325409,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.2181,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.8822055137844611,
135
- "eval_f1": 0.858259325044405,
136
- "eval_loss": 0.27966198325157166,
137
- "eval_precision": 0.8573798178418481,
138
- "eval_recall": 0.8591562102200401,
139
- "eval_runtime": 5.1216,
140
- "eval_samples_per_second": 77.905,
141
- "eval_steps_per_second": 9.762,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 3.9783072471618652,
147
  "learning_rate": 3e-05,
148
- "loss": 0.2114,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.87468671679198,
154
- "eval_f1": 0.8488361520276414,
155
- "eval_loss": 0.28283119201660156,
156
- "eval_precision": 0.8488361520276414,
157
- "eval_recall": 0.8488361520276414,
158
- "eval_runtime": 5.152,
159
- "eval_samples_per_second": 77.446,
160
- "eval_steps_per_second": 9.705,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 5.3162617683410645,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.199,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.8796992481203008,
173
- "eval_f1": 0.8569892473118279,
174
- "eval_loss": 0.28352341055870056,
175
- "eval_precision": 0.8522004241781549,
176
- "eval_recall": 0.8623840698308783,
177
- "eval_runtime": 5.133,
178
- "eval_samples_per_second": 77.732,
179
- "eval_steps_per_second": 9.741,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 3.3976027965545654,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.189,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.8771929824561403,
192
- "eval_f1": 0.8499551039516197,
193
- "eval_loss": 0.28156498074531555,
194
- "eval_precision": 0.8547473867595818,
195
- "eval_recall": 0.8456082924168031,
196
- "eval_runtime": 5.1772,
197
- "eval_samples_per_second": 77.068,
198
- "eval_steps_per_second": 9.658,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 4.37730073928833,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.1738,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.8822055137844611,
211
- "eval_f1": 0.858259325044405,
212
- "eval_loss": 0.290470689535141,
213
- "eval_precision": 0.8573798178418481,
214
- "eval_recall": 0.8591562102200401,
215
- "eval_runtime": 5.174,
216
- "eval_samples_per_second": 77.116,
217
- "eval_steps_per_second": 9.664,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 4.796699047088623,
223
  "learning_rate": 2e-05,
224
- "loss": 0.1688,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.8822055137844611,
230
- "eval_f1": 0.852937255424767,
231
- "eval_loss": 0.3152221739292145,
232
- "eval_precision": 0.8674217731421121,
233
- "eval_recall": 0.8416530278232406,
234
- "eval_runtime": 5.1474,
235
- "eval_samples_per_second": 77.514,
236
- "eval_steps_per_second": 9.714,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 3.7500946521759033,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.1655,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
  "eval_accuracy": 0.8696741854636592,
249
  "eval_f1": 0.8411818738518064,
250
- "eval_loss": 0.2901403605937958,
251
  "eval_precision": 0.8448542607834644,
252
  "eval_recall": 0.8377886888525186,
253
- "eval_runtime": 5.147,
254
- "eval_samples_per_second": 77.521,
255
- "eval_steps_per_second": 9.714,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 3.2664284706115723,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.1467,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.8796992481203008,
268
- "eval_f1": 0.8576609786550924,
269
- "eval_loss": 0.29550954699516296,
270
- "eval_precision": 0.8515036496350366,
271
- "eval_recall": 0.8648845244589926,
272
- "eval_runtime": 5.17,
273
- "eval_samples_per_second": 77.177,
274
- "eval_steps_per_second": 9.671,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 2.9612016677856445,
280
  "learning_rate": 1.25e-05,
281
  "loss": 0.1442,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.8822055137844611,
287
- "eval_f1": 0.8589543987905864,
288
- "eval_loss": 0.2865639328956604,
289
- "eval_precision": 0.8564068100358423,
290
- "eval_recall": 0.8616566648481543,
291
- "eval_runtime": 5.1432,
292
- "eval_samples_per_second": 77.578,
293
- "eval_steps_per_second": 9.722,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 1.4573726654052734,
299
  "learning_rate": 1e-05,
300
- "loss": 0.1419,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.8847117794486216,
306
- "eval_f1": 0.8616171059774413,
307
- "eval_loss": 0.2902199625968933,
308
- "eval_precision": 0.859873949579832,
309
- "eval_recall": 0.8634297144935443,
310
- "eval_runtime": 5.1161,
311
- "eval_samples_per_second": 77.99,
312
- "eval_steps_per_second": 9.773,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 0.22904033958911896,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.1416,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.8897243107769424,
325
- "eval_f1": 0.8676337535436396,
326
- "eval_loss": 0.28978079557418823,
327
- "eval_precision": 0.8658613445378152,
328
- "eval_recall": 0.8694762684124386,
329
- "eval_runtime": 5.0704,
330
- "eval_samples_per_second": 78.692,
331
- "eval_steps_per_second": 9.861,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 2.0896894931793213,
337
  "learning_rate": 5e-06,
338
- "loss": 0.1389,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.8872180451127819,
344
- "eval_f1": 0.8629148629148629,
345
- "eval_loss": 0.2956070601940155,
346
- "eval_precision": 0.8657894736842104,
347
- "eval_recall": 0.860201854882706,
348
- "eval_runtime": 5.0999,
349
- "eval_samples_per_second": 78.237,
350
- "eval_steps_per_second": 9.804,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 0.2827831506729126,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.1401,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.8922305764411027,
363
- "eval_f1": 0.8696722245432793,
364
- "eval_loss": 0.2937273681163788,
365
- "eval_precision": 0.8706135006701596,
366
- "eval_recall": 0.8687488634297145,
367
- "eval_runtime": 5.1361,
368
- "eval_samples_per_second": 77.686,
369
- "eval_steps_per_second": 9.735,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 4.219526290893555,
375
  "learning_rate": 0.0,
376
- "loss": 0.1348,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.8922305764411027,
382
- "eval_f1": 0.8696722245432793,
383
- "eval_loss": 0.29425162076950073,
384
- "eval_precision": 0.8706135006701596,
385
- "eval_recall": 0.8687488634297145,
386
- "eval_runtime": 5.1486,
387
- "eval_samples_per_second": 77.497,
388
- "eval_steps_per_second": 9.711,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8444128359504000.0,
395
- "train_loss": 0.2198484170632284,
396
- "train_runtime": 1952.2473,
397
- "train_samples_per_second": 37.27,
398
- "train_steps_per_second": 1.25
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 4.9823102951049805,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.5456,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
  "eval_accuracy": 0.7468671679197995,
21
+ "eval_f1": 0.6641666041651041,
22
+ "eval_loss": 0.49161723256111145,
23
+ "eval_precision": 0.6912698412698413,
24
+ "eval_recall": 0.6533915257319513,
25
+ "eval_runtime": 1.7896,
26
+ "eval_samples_per_second": 222.952,
27
+ "eval_steps_per_second": 27.939,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 2.931857109069824,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.4369,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.8120300751879699,
40
+ "eval_f1": 0.7912087912087912,
41
+ "eval_loss": 0.41080209612846375,
42
+ "eval_precision": 0.7802706552706553,
43
+ "eval_recall": 0.822013093289689,
44
+ "eval_runtime": 1.8221,
45
+ "eval_samples_per_second": 218.982,
46
+ "eval_steps_per_second": 27.441,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 3.00280499458313,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.3316,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.8571428571428571,
59
+ "eval_f1": 0.81524926686217,
60
+ "eval_loss": 0.3294452726840973,
61
+ "eval_precision": 0.8462682339611953,
62
+ "eval_recall": 0.7964175304600837,
63
+ "eval_runtime": 1.7904,
64
+ "eval_samples_per_second": 222.854,
65
+ "eval_steps_per_second": 27.927,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 3.0830881595611572,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.2909,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.8771929824561403,
78
+ "eval_f1": 0.8499551039516197,
79
+ "eval_loss": 0.30187976360321045,
80
+ "eval_precision": 0.8547473867595818,
81
+ "eval_recall": 0.8456082924168031,
82
+ "eval_runtime": 1.8,
83
+ "eval_samples_per_second": 221.672,
84
+ "eval_steps_per_second": 27.778,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 2.5169291496276855,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.2584,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.8696741854636592,
97
+ "eval_f1": 0.842789598108747,
98
+ "eval_loss": 0.3023463487625122,
99
+ "eval_precision": 0.842789598108747,
100
+ "eval_recall": 0.842789598108747,
101
+ "eval_runtime": 1.7982,
102
+ "eval_samples_per_second": 221.888,
103
+ "eval_steps_per_second": 27.805,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 1.894607424736023,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.237,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.8646616541353384,
116
+ "eval_f1": 0.8375505157126486,
117
+ "eval_loss": 0.30203157663345337,
118
+ "eval_precision": 0.8359243697478991,
119
+ "eval_recall": 0.8392434988179669,
120
+ "eval_runtime": 1.7962,
121
+ "eval_samples_per_second": 222.138,
122
+ "eval_steps_per_second": 27.837,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 0.33639559149742126,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.2186,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.8721804511278195,
135
+ "eval_f1": 0.8491157824491158,
136
+ "eval_loss": 0.2988818287849426,
137
+ "eval_precision": 0.8424908424908425,
138
+ "eval_recall": 0.8570649208947081,
139
+ "eval_runtime": 1.8005,
140
+ "eval_samples_per_second": 221.607,
141
+ "eval_steps_per_second": 27.77,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 1.405568242073059,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.2108,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8872180451127819,
154
+ "eval_f1": 0.8614765038536611,
155
+ "eval_loss": 0.29612118005752563,
156
+ "eval_precision": 0.8686536646744258,
157
+ "eval_recall": 0.8552009456264775,
158
+ "eval_runtime": 1.8282,
159
+ "eval_samples_per_second": 218.244,
160
+ "eval_steps_per_second": 27.349,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 6.2904157638549805,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.1898,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.87468671679198,
173
+ "eval_f1": 0.8480717680029244,
174
+ "eval_loss": 0.30133265256881714,
175
+ "eval_precision": 0.8498775260257195,
176
+ "eval_recall": 0.8463356973995272,
177
+ "eval_runtime": 1.7951,
178
+ "eval_samples_per_second": 222.272,
179
+ "eval_steps_per_second": 27.854,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 3.820188522338867,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.1894,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.87468671679198,
192
+ "eval_f1": 0.8456742372671576,
193
+ "eval_loss": 0.32312536239624023,
194
+ "eval_precision": 0.8536697247706422,
195
+ "eval_recall": 0.8388343335151845,
196
+ "eval_runtime": 1.8085,
197
+ "eval_samples_per_second": 220.63,
198
+ "eval_steps_per_second": 27.648,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 1.6913771629333496,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.1817,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.8771929824561403,
211
+ "eval_f1": 0.8514869535493182,
212
+ "eval_loss": 0.301199346780777,
213
+ "eval_precision": 0.8523821128305106,
214
+ "eval_recall": 0.8506092016730314,
215
+ "eval_runtime": 1.8217,
216
+ "eval_samples_per_second": 219.025,
217
+ "eval_steps_per_second": 27.447,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 5.912967681884766,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.1723,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.8646616541353384,
230
+ "eval_f1": 0.8359175094431583,
231
+ "eval_loss": 0.29794374108314514,
232
+ "eval_precision": 0.8376607470912432,
233
+ "eval_recall": 0.8342425895617385,
234
+ "eval_runtime": 1.7955,
235
+ "eval_samples_per_second": 222.218,
236
+ "eval_steps_per_second": 27.847,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 0.4766283929347992,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.1547,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
  "eval_accuracy": 0.8696741854636592,
249
  "eval_f1": 0.8411818738518064,
250
+ "eval_loss": 0.2937452793121338,
251
  "eval_precision": 0.8448542607834644,
252
  "eval_recall": 0.8377886888525186,
253
+ "eval_runtime": 1.8242,
254
+ "eval_samples_per_second": 218.73,
255
+ "eval_steps_per_second": 27.41,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 4.687190532684326,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.1569,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.8696741854636592,
268
+ "eval_f1": 0.8485547445255475,
269
+ "eval_loss": 0.3064776360988617,
270
+ "eval_precision": 0.8383458646616542,
271
+ "eval_recall": 0.8627932351336607,
272
+ "eval_runtime": 1.7921,
273
+ "eval_samples_per_second": 222.641,
274
+ "eval_steps_per_second": 27.9,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 4.828568458557129,
280
  "learning_rate": 1.25e-05,
281
  "loss": 0.1442,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.8771929824561403,
287
+ "eval_f1": 0.8529524583135901,
288
+ "eval_loss": 0.2884095013141632,
289
+ "eval_precision": 0.8504480286738352,
290
+ "eval_recall": 0.8556101109292599,
291
+ "eval_runtime": 1.8008,
292
+ "eval_samples_per_second": 221.563,
293
+ "eval_steps_per_second": 27.765,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 3.8095035552978516,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.1435,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.8796992481203008,
306
+ "eval_f1": 0.8563025210084034,
307
+ "eval_loss": 0.30156683921813965,
308
+ "eval_precision": 0.8529936381473334,
309
+ "eval_recall": 0.8598836152027641,
310
+ "eval_runtime": 1.8479,
311
+ "eval_samples_per_second": 215.919,
312
+ "eval_steps_per_second": 27.058,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 0.34168025851249695,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.1378,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.8771929824561403,
325
+ "eval_f1": 0.8556962025316456,
326
+ "eval_loss": 0.31144392490386963,
327
+ "eval_precision": 0.8477571494464944,
328
+ "eval_recall": 0.8656119294417166,
329
+ "eval_runtime": 1.8294,
330
+ "eval_samples_per_second": 218.106,
331
+ "eval_steps_per_second": 27.332,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 0.2586296498775482,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.1377,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.8771929824561403,
344
+ "eval_f1": 0.8550328105883662,
345
+ "eval_loss": 0.3096340000629425,
346
+ "eval_precision": 0.8482905982905984,
347
+ "eval_recall": 0.8631114748136025,
348
+ "eval_runtime": 1.8332,
349
+ "eval_samples_per_second": 217.658,
350
+ "eval_steps_per_second": 27.275,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 0.0663389191031456,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.1307,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.8796992481203008,
363
+ "eval_f1": 0.8556004584112431,
364
+ "eval_loss": 0.30646833777427673,
365
+ "eval_precision": 0.8538865546218487,
366
+ "eval_recall": 0.85738316057465,
367
+ "eval_runtime": 1.8307,
368
+ "eval_samples_per_second": 217.954,
369
+ "eval_steps_per_second": 27.313,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 3.8260014057159424,
375
  "learning_rate": 0.0,
376
+ "loss": 0.126,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.8872180451127819,
382
+ "eval_f1": 0.8656072933585827,
383
+ "eval_loss": 0.30540019273757935,
384
+ "eval_precision": 0.8614399005740664,
385
+ "eval_recall": 0.8702036733951628,
386
+ "eval_runtime": 1.8294,
387
+ "eval_samples_per_second": 218.099,
388
+ "eval_steps_per_second": 27.331,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8444128359504000.0,
395
+ "train_loss": 0.21972156196344095,
396
+ "train_runtime": 635.9425,
397
+ "train_samples_per_second": 114.413,
398
+ "train_steps_per_second": 3.837
399
  }
400
  ],
401
  "logging_steps": 500,