apwic commited on
Commit
b35e994
1 Parent(s): d3783a8

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accuracy": 0.9010880316518298,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.8822055137844611,
5
- "eval_f1": 0.8589543987905864,
6
- "eval_loss": 0.32526540756225586,
7
- "eval_precision": 0.8564068100358423,
8
- "eval_recall": 0.8616566648481543,
9
- "eval_runtime": 5.0035,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 79.744,
12
- "eval_steps_per_second": 9.993,
13
- "f1": 0.8826220220127247,
14
- "precision": 0.8778956201166501,
15
- "recall": 0.8878803994841722,
16
- "train_loss": 0.21670899273919278,
17
- "train_runtime": 1925.8785,
18
  "train_samples": 3638,
19
- "train_samples_per_second": 37.78,
20
- "train_steps_per_second": 1.267
21
  }
 
1
  {
2
+ "accuracy": 0.9090009891196835,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.8847117794486216,
5
+ "eval_f1": 0.8609292598654301,
6
+ "eval_loss": 0.29736196994781494,
7
+ "eval_precision": 0.8609292598654301,
8
+ "eval_recall": 0.8609292598654301,
9
+ "eval_runtime": 1.8087,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 220.599,
12
+ "eval_steps_per_second": 27.644,
13
+ "f1": 0.8922124878320122,
14
+ "precision": 0.8867530544919537,
15
+ "recall": 0.8983734480454079,
16
+ "train_loss": 0.23053875125822473,
17
+ "train_runtime": 628.042,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 115.852,
20
+ "train_steps_per_second": 3.885
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.8822055137844611,
4
- "eval_f1": 0.8589543987905864,
5
- "eval_loss": 0.32526540756225586,
6
- "eval_precision": 0.8564068100358423,
7
- "eval_recall": 0.8616566648481543,
8
- "eval_runtime": 5.0035,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 79.744,
11
- "eval_steps_per_second": 9.993
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.8847117794486216,
4
+ "eval_f1": 0.8609292598654301,
5
+ "eval_loss": 0.29736196994781494,
6
+ "eval_precision": 0.8609292598654301,
7
+ "eval_recall": 0.8609292598654301,
8
+ "eval_runtime": 1.8087,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 220.599,
11
+ "eval_steps_per_second": 27.644
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.9010880316518298,
3
- "f1": 0.8826220220127247,
4
- "precision": 0.8778956201166501,
5
- "recall": 0.8878803994841722
6
  }
 
1
  {
2
+ "accuracy": 0.9090009891196835,
3
+ "f1": 0.8922124878320122,
4
+ "precision": 0.8867530544919537,
5
+ "recall": 0.8983734480454079
6
  }
predict_results.txt CHANGED
@@ -5,7 +5,7 @@ index prediction
5
  3 1
6
  4 1
7
  5 1
8
- 6 0
9
  7 1
10
  8 0
11
  9 1
@@ -36,7 +36,7 @@ index prediction
36
  34 1
37
  35 1
38
  36 1
39
- 37 1
40
  38 1
41
  39 1
42
  40 1
@@ -75,21 +75,21 @@ index prediction
75
  73 1
76
  74 1
77
  75 1
78
- 76 1
79
  77 1
80
  78 0
81
  79 1
82
  80 1
83
  81 1
84
  82 1
85
- 83 0
86
  84 1
87
  85 1
88
  86 1
89
  87 1
90
  88 1
91
  89 1
92
- 90 1
93
  91 1
94
  92 1
95
  93 1
@@ -128,7 +128,7 @@ index prediction
128
  126 1
129
  127 1
130
  128 1
131
- 129 0
132
  130 1
133
  131 1
134
  132 1
@@ -144,13 +144,13 @@ index prediction
144
  142 1
145
  143 1
146
  144 1
147
- 145 0
148
  146 1
149
  147 1
150
- 148 0
151
  149 1
152
  150 1
153
- 151 0
154
  152 1
155
  153 1
156
  154 1
@@ -165,10 +165,10 @@ index prediction
165
  163 1
166
  164 1
167
  165 1
168
- 166 1
169
  167 0
170
  168 1
171
- 169 0
172
  170 1
173
  171 1
174
  172 1
@@ -186,7 +186,7 @@ index prediction
186
  184 1
187
  185 1
188
  186 1
189
- 187 1
190
  188 1
191
  189 0
192
  190 1
@@ -211,7 +211,7 @@ index prediction
211
  209 1
212
  210 1
213
  211 1
214
- 212 0
215
  213 1
216
  214 1
217
  215 0
@@ -224,7 +224,7 @@ index prediction
224
  222 1
225
  223 1
226
  224 1
227
- 225 0
228
  226 1
229
  227 0
230
  228 1
@@ -247,7 +247,7 @@ index prediction
247
  245 1
248
  246 1
249
  247 1
250
- 248 0
251
  249 1
252
  250 1
253
  251 1
@@ -267,7 +267,7 @@ index prediction
267
  265 1
268
  266 1
269
  267 1
270
- 268 1
271
  269 1
272
  270 1
273
  271 1
@@ -281,12 +281,12 @@ index prediction
281
  279 1
282
  280 1
283
  281 1
284
- 282 0
285
  283 1
286
- 284 0
287
  285 1
288
  286 1
289
- 287 1
290
  288 1
291
  289 0
292
  290 1
@@ -365,7 +365,7 @@ index prediction
365
  363 0
366
  364 0
367
  365 0
368
- 366 1
369
  367 0
370
  368 0
371
  369 0
@@ -385,7 +385,7 @@ index prediction
385
  383 0
386
  384 1
387
  385 0
388
- 386 0
389
  387 0
390
  388 0
391
  389 0
@@ -397,7 +397,7 @@ index prediction
397
  395 0
398
  396 0
399
  397 0
400
- 398 1
401
  399 0
402
  400 0
403
  401 0
@@ -419,7 +419,7 @@ index prediction
419
  417 0
420
  418 0
421
  419 0
422
- 420 0
423
  421 0
424
  422 0
425
  423 0
@@ -441,7 +441,7 @@ index prediction
441
  439 0
442
  440 0
443
  441 0
444
- 442 1
445
  443 0
446
  444 0
447
  445 0
@@ -472,7 +472,7 @@ index prediction
472
  470 0
473
  471 0
474
  472 0
475
- 473 0
476
  474 1
477
  475 0
478
  476 1
@@ -566,7 +566,7 @@ index prediction
566
  564 0
567
  565 1
568
  566 0
569
- 567 0
570
  568 0
571
  569 0
572
  570 0
@@ -590,12 +590,12 @@ index prediction
590
  588 0
591
  589 0
592
  590 0
593
- 591 0
594
  592 0
595
  593 0
596
  594 0
597
  595 0
598
- 596 1
599
  597 1
600
  598 0
601
  599 0
@@ -617,7 +617,7 @@ index prediction
617
  615 0
618
  616 0
619
  617 0
620
- 618 1
621
  619 0
622
  620 0
623
  621 0
@@ -648,7 +648,7 @@ index prediction
648
  646 0
649
  647 0
650
  648 0
651
- 649 1
652
  650 0
653
  651 0
654
  652 0
@@ -672,8 +672,8 @@ index prediction
672
  670 0
673
  671 0
674
  672 0
675
- 673 1
676
- 674 0
677
  675 0
678
  676 0
679
  677 0
@@ -696,11 +696,11 @@ index prediction
696
  694 0
697
  695 0
698
  696 0
699
- 697 1
700
  698 0
701
  699 0
702
  700 0
703
- 701 1
704
  702 0
705
  703 0
706
  704 0
@@ -725,9 +725,9 @@ index prediction
725
  723 0
726
  724 0
727
  725 0
728
- 726 1
729
  727 0
730
- 728 1
731
  729 0
732
  730 0
733
  731 0
@@ -801,7 +801,7 @@ index prediction
801
  799 0
802
  800 0
803
  801 0
804
- 802 0
805
  803 0
806
  804 0
807
  805 0
@@ -827,7 +827,7 @@ index prediction
827
  825 0
828
  826 0
829
  827 0
830
- 828 1
831
  829 0
832
  830 0
833
  831 1
@@ -853,13 +853,13 @@ index prediction
853
  851 0
854
  852 0
855
  853 0
856
- 854 0
857
  855 0
858
  856 0
859
  857 0
860
  858 0
861
  859 0
862
- 860 0
863
  861 0
864
  862 0
865
  863 0
@@ -912,7 +912,7 @@ index prediction
912
  910 0
913
  911 0
914
  912 0
915
- 913 0
916
  914 0
917
  915 0
918
  916 0
@@ -946,7 +946,7 @@ index prediction
946
  944 0
947
  945 1
948
  946 0
949
- 947 0
950
  948 0
951
  949 0
952
  950 0
@@ -966,7 +966,7 @@ index prediction
966
  964 0
967
  965 0
968
  966 0
969
- 967 1
970
  968 0
971
  969 0
972
  970 0
@@ -1001,7 +1001,7 @@ index prediction
1001
  999 1
1002
  1000 0
1003
  1001 0
1004
- 1002 1
1005
  1003 0
1006
  1004 1
1007
  1005 0
 
5
  3 1
6
  4 1
7
  5 1
8
+ 6 1
9
  7 1
10
  8 0
11
  9 1
 
36
  34 1
37
  35 1
38
  36 1
39
+ 37 0
40
  38 1
41
  39 1
42
  40 1
 
75
  73 1
76
  74 1
77
  75 1
78
+ 76 0
79
  77 1
80
  78 0
81
  79 1
82
  80 1
83
  81 1
84
  82 1
85
+ 83 1
86
  84 1
87
  85 1
88
  86 1
89
  87 1
90
  88 1
91
  89 1
92
+ 90 0
93
  91 1
94
  92 1
95
  93 1
 
128
  126 1
129
  127 1
130
  128 1
131
+ 129 1
132
  130 1
133
  131 1
134
  132 1
 
144
  142 1
145
  143 1
146
  144 1
147
+ 145 1
148
  146 1
149
  147 1
150
+ 148 1
151
  149 1
152
  150 1
153
+ 151 1
154
  152 1
155
  153 1
156
  154 1
 
165
  163 1
166
  164 1
167
  165 1
168
+ 166 0
169
  167 0
170
  168 1
171
+ 169 1
172
  170 1
173
  171 1
174
  172 1
 
186
  184 1
187
  185 1
188
  186 1
189
+ 187 0
190
  188 1
191
  189 0
192
  190 1
 
211
  209 1
212
  210 1
213
  211 1
214
+ 212 1
215
  213 1
216
  214 1
217
  215 0
 
224
  222 1
225
  223 1
226
  224 1
227
+ 225 1
228
  226 1
229
  227 0
230
  228 1
 
247
  245 1
248
  246 1
249
  247 1
250
+ 248 1
251
  249 1
252
  250 1
253
  251 1
 
267
  265 1
268
  266 1
269
  267 1
270
+ 268 0
271
  269 1
272
  270 1
273
  271 1
 
281
  279 1
282
  280 1
283
  281 1
284
+ 282 1
285
  283 1
286
+ 284 1
287
  285 1
288
  286 1
289
+ 287 0
290
  288 1
291
  289 0
292
  290 1
 
365
  363 0
366
  364 0
367
  365 0
368
+ 366 0
369
  367 0
370
  368 0
371
  369 0
 
385
  383 0
386
  384 1
387
  385 0
388
+ 386 1
389
  387 0
390
  388 0
391
  389 0
 
397
  395 0
398
  396 0
399
  397 0
400
+ 398 0
401
  399 0
402
  400 0
403
  401 0
 
419
  417 0
420
  418 0
421
  419 0
422
+ 420 1
423
  421 0
424
  422 0
425
  423 0
 
441
  439 0
442
  440 0
443
  441 0
444
+ 442 0
445
  443 0
446
  444 0
447
  445 0
 
472
  470 0
473
  471 0
474
  472 0
475
+ 473 1
476
  474 1
477
  475 0
478
  476 1
 
566
  564 0
567
  565 1
568
  566 0
569
+ 567 1
570
  568 0
571
  569 0
572
  570 0
 
590
  588 0
591
  589 0
592
  590 0
593
+ 591 1
594
  592 0
595
  593 0
596
  594 0
597
  595 0
598
+ 596 0
599
  597 1
600
  598 0
601
  599 0
 
617
  615 0
618
  616 0
619
  617 0
620
+ 618 0
621
  619 0
622
  620 0
623
  621 0
 
648
  646 0
649
  647 0
650
  648 0
651
+ 649 0
652
  650 0
653
  651 0
654
  652 0
 
672
  670 0
673
  671 0
674
  672 0
675
+ 673 0
676
+ 674 1
677
  675 0
678
  676 0
679
  677 0
 
696
  694 0
697
  695 0
698
  696 0
699
+ 697 0
700
  698 0
701
  699 0
702
  700 0
703
+ 701 0
704
  702 0
705
  703 0
706
  704 0
 
725
  723 0
726
  724 0
727
  725 0
728
+ 726 0
729
  727 0
730
+ 728 0
731
  729 0
732
  730 0
733
  731 0
 
801
  799 0
802
  800 0
803
  801 0
804
+ 802 1
805
  803 0
806
  804 0
807
  805 0
 
827
  825 0
828
  826 0
829
  827 0
830
+ 828 0
831
  829 0
832
  830 0
833
  831 1
 
853
  851 0
854
  852 0
855
  853 0
856
+ 854 1
857
  855 0
858
  856 0
859
  857 0
860
  858 0
861
  859 0
862
+ 860 1
863
  861 0
864
  862 0
865
  863 0
 
912
  910 0
913
  911 0
914
  912 0
915
+ 913 1
916
  914 0
917
  915 0
918
  916 0
 
946
  944 0
947
  945 1
948
  946 0
949
+ 947 1
950
  948 0
951
  949 0
952
  950 0
 
966
  964 0
967
  965 0
968
  966 0
969
+ 967 0
970
  968 0
971
  969 0
972
  970 0
 
1001
  999 1
1002
  1000 0
1003
  1001 0
1004
+ 1002 0
1005
  1003 0
1006
  1004 1
1007
  1005 0
runs/Jun03_14-39-35_a358b85c7679/events.out.tfevents.1717426219.a358b85c7679.146701.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f777f3ea51e241fe7c17b7ea22ee042d0ba1bc84101b1e2eee6519c6d20376ed
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.21670899273919278,
4
- "train_runtime": 1925.8785,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 37.78,
7
- "train_steps_per_second": 1.267
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.23053875125822473,
4
+ "train_runtime": 628.042,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 115.852,
7
+ "train_steps_per_second": 3.885
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 6.052820205688477,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.5445,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7518796992481203,
21
- "eval_f1": 0.6708167704192605,
22
- "eval_loss": 0.48936620354652405,
23
- "eval_precision": 0.6988095238095238,
24
- "eval_recall": 0.6594380796508457,
25
- "eval_runtime": 5.1391,
26
- "eval_samples_per_second": 77.64,
27
- "eval_steps_per_second": 9.729,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 3.3890905380249023,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.432,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.8195488721804511,
40
- "eval_f1": 0.7954312407428505,
41
- "eval_loss": 0.36909204721450806,
42
- "eval_precision": 0.7842791202823785,
43
- "eval_recall": 0.8173304237134025,
44
- "eval_runtime": 5.0614,
45
- "eval_samples_per_second": 78.833,
46
- "eval_steps_per_second": 9.879,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 2.7538790702819824,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.3342,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.849624060150376,
59
- "eval_f1": 0.7956919505700826,
60
- "eval_loss": 0.33010444045066833,
61
- "eval_precision": 0.85742518351214,
62
- "eval_recall": 0.7685942898708855,
63
- "eval_runtime": 5.0669,
64
- "eval_samples_per_second": 78.747,
65
- "eval_steps_per_second": 9.868,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 3.051776170730591,
71
  "learning_rate": 4e-05,
72
- "loss": 0.2846,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.8796992481203008,
78
- "eval_f1": 0.8502252252252251,
79
- "eval_loss": 0.2885676324367523,
80
- "eval_precision": 0.863265306122449,
81
- "eval_recall": 0.8398799781778505,
82
- "eval_runtime": 5.0493,
83
- "eval_samples_per_second": 79.021,
84
- "eval_steps_per_second": 9.902,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 3.2031478881835938,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.2621,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.87468671679198,
97
- "eval_f1": 0.8488361520276414,
98
- "eval_loss": 0.27282822132110596,
99
- "eval_precision": 0.8488361520276414,
100
- "eval_recall": 0.8488361520276414,
101
- "eval_runtime": 5.0626,
102
- "eval_samples_per_second": 78.814,
103
- "eval_steps_per_second": 9.876,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 1.391174554824829,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.2419,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.87468671679198,
116
- "eval_f1": 0.8503151260504201,
117
- "eval_loss": 0.27526727318763733,
118
- "eval_precision": 0.8470628455912955,
119
- "eval_recall": 0.8538370612838698,
120
- "eval_runtime": 5.0607,
121
- "eval_samples_per_second": 78.843,
122
- "eval_steps_per_second": 9.88,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 0.6138697862625122,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.2132,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
  "eval_accuracy": 0.8847117794486216,
135
- "eval_f1": 0.8622899159663866,
136
- "eval_loss": 0.27531638741493225,
137
- "eval_precision": 0.8589244307033712,
138
- "eval_recall": 0.8659301691216585,
139
- "eval_runtime": 5.0703,
140
- "eval_samples_per_second": 78.693,
141
- "eval_steps_per_second": 9.861,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 1.8498824834823608,
147
  "learning_rate": 3e-05,
148
- "loss": 0.2055,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
  "eval_accuracy": 0.8796992481203008,
154
- "eval_f1": 0.8541488972828073,
155
- "eval_loss": 0.27910253405570984,
156
- "eval_precision": 0.8559859154929578,
157
- "eval_recall": 0.8523822513184216,
158
- "eval_runtime": 5.0606,
159
- "eval_samples_per_second": 78.844,
160
- "eval_steps_per_second": 9.88,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 6.801692485809326,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.1903,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.8621553884711779,
173
- "eval_f1": 0.8306935047100303,
174
- "eval_loss": 0.3009396195411682,
175
- "eval_precision": 0.8372758729160114,
176
- "eval_recall": 0.8249681760320058,
177
- "eval_runtime": 5.0892,
178
- "eval_samples_per_second": 78.402,
179
- "eval_steps_per_second": 9.825,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 5.017825126647949,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.1852,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.8671679197994987,
192
- "eval_f1": 0.8341632880321839,
193
- "eval_loss": 0.3085351884365082,
194
- "eval_precision": 0.8479139504563233,
195
- "eval_recall": 0.8235133660665576,
196
- "eval_runtime": 5.0614,
197
- "eval_samples_per_second": 78.832,
198
- "eval_steps_per_second": 9.879,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 3.8085110187530518,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.1758,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.8796992481203008,
211
- "eval_f1": 0.8556004584112431,
212
- "eval_loss": 0.28515923023223877,
213
- "eval_precision": 0.8538865546218487,
214
- "eval_recall": 0.85738316057465,
215
- "eval_runtime": 5.053,
216
- "eval_samples_per_second": 78.964,
217
- "eval_steps_per_second": 9.895,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 4.22409725189209,
223
  "learning_rate": 2e-05,
224
- "loss": 0.1617,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.8872180451127819,
230
- "eval_f1": 0.8642908431276217,
231
- "eval_loss": 0.30107688903808594,
232
- "eval_precision": 0.8633964654080464,
233
- "eval_recall": 0.8652027641389344,
234
- "eval_runtime": 5.0864,
235
- "eval_samples_per_second": 78.444,
236
- "eval_steps_per_second": 9.83,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 4.442287921905518,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.1581,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.8922305764411027,
249
- "eval_f1": 0.8703223612108386,
250
- "eval_loss": 0.3050229847431183,
251
- "eval_precision": 0.8694131129742446,
252
- "eval_recall": 0.8712493180578287,
253
- "eval_runtime": 5.0713,
254
- "eval_samples_per_second": 78.678,
255
- "eval_steps_per_second": 9.859,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 3.8908846378326416,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.149,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.8922305764411027,
268
- "eval_f1": 0.8744925055412909,
269
- "eval_loss": 0.3143095374107361,
270
- "eval_precision": 0.8639342866870956,
271
- "eval_recall": 0.8887525004546282,
272
- "eval_runtime": 5.0958,
273
- "eval_samples_per_second": 78.299,
274
- "eval_steps_per_second": 9.812,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 3.6980299949645996,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.1386,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.899749373433584,
287
- "eval_f1": 0.8790689216221131,
288
- "eval_loss": 0.302778035402298,
289
- "eval_precision": 0.8790689216221131,
290
- "eval_recall": 0.8790689216221131,
291
- "eval_runtime": 5.0849,
292
- "eval_samples_per_second": 78.468,
293
- "eval_steps_per_second": 9.833,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 2.6725683212280273,
299
  "learning_rate": 1e-05,
300
- "loss": 0.1465,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
  "eval_accuracy": 0.8922305764411027,
306
- "eval_f1": 0.8696722245432793,
307
- "eval_loss": 0.3111669719219208,
308
- "eval_precision": 0.8706135006701596,
309
- "eval_recall": 0.8687488634297145,
310
- "eval_runtime": 5.0592,
311
- "eval_samples_per_second": 78.866,
312
- "eval_steps_per_second": 9.883,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 0.7393283247947693,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.1307,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.8847117794486216,
325
- "eval_f1": 0.8629480286738351,
326
- "eval_loss": 0.31975144147872925,
327
- "eval_precision": 0.8580770590314599,
328
- "eval_recall": 0.8684306237497728,
329
- "eval_runtime": 5.0777,
330
- "eval_samples_per_second": 78.579,
331
- "eval_steps_per_second": 9.847,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 0.382880836725235,
337
  "learning_rate": 5e-06,
338
- "loss": 0.1231,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.8847117794486216,
344
- "eval_f1": 0.8609292598654301,
345
- "eval_loss": 0.32525473833084106,
346
- "eval_precision": 0.8609292598654301,
347
- "eval_recall": 0.8609292598654301,
348
- "eval_runtime": 5.096,
349
- "eval_samples_per_second": 78.296,
350
- "eval_steps_per_second": 9.812,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 0.1525256484746933,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.1344,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.8796992481203008,
363
- "eval_f1": 0.8541488972828073,
364
- "eval_loss": 0.3289809823036194,
365
- "eval_precision": 0.8559859154929578,
366
- "eval_recall": 0.8523822513184216,
367
- "eval_runtime": 5.0693,
368
- "eval_samples_per_second": 78.709,
369
- "eval_steps_per_second": 9.863,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 4.0254364013671875,
375
  "learning_rate": 0.0,
376
- "loss": 0.1229,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.8822055137844611,
382
- "eval_f1": 0.8589543987905864,
383
- "eval_loss": 0.32526540756225586,
384
- "eval_precision": 0.8564068100358423,
385
- "eval_recall": 0.8616566648481543,
386
- "eval_runtime": 5.0827,
387
- "eval_samples_per_second": 78.502,
388
- "eval_steps_per_second": 9.837,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8444128359504000.0,
395
- "train_loss": 0.21670899273919278,
396
- "train_runtime": 1925.8785,
397
- "train_samples_per_second": 37.78,
398
- "train_steps_per_second": 1.267
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 3.532687187194824,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.551,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7243107769423559,
21
+ "eval_f1": 0.6144317942230656,
22
+ "eval_loss": 0.5008909106254578,
23
+ "eval_precision": 0.65566534914361,
24
+ "eval_recall": 0.6074286233860702,
25
+ "eval_runtime": 1.7719,
26
+ "eval_samples_per_second": 225.176,
27
+ "eval_steps_per_second": 28.218,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 3.3572421073913574,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.4528,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.7819548872180451,
40
+ "eval_f1": 0.7578021978021978,
41
+ "eval_loss": 0.4117695391178131,
42
+ "eval_precision": 0.7486942070275404,
43
+ "eval_recall": 0.7857337697763229,
44
+ "eval_runtime": 1.781,
45
+ "eval_samples_per_second": 224.038,
46
+ "eval_steps_per_second": 28.075,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 2.5434460639953613,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.3588,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.8521303258145363,
59
+ "eval_f1": 0.8063505120717311,
60
+ "eval_loss": 0.3427870571613312,
61
+ "eval_precision": 0.844033177433499,
62
+ "eval_recall": 0.7853700672849608,
63
+ "eval_runtime": 1.7815,
64
+ "eval_samples_per_second": 223.975,
65
+ "eval_steps_per_second": 28.067,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 4.183424949645996,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.3192,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.8546365914786967,
78
+ "eval_f1": 0.8246499363520641,
79
+ "eval_loss": 0.31169387698173523,
80
+ "eval_precision": 0.8246499363520641,
81
+ "eval_recall": 0.8246499363520641,
82
+ "eval_runtime": 1.8089,
83
+ "eval_samples_per_second": 220.578,
84
+ "eval_steps_per_second": 27.641,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 1.7957929372787476,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.2714,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.8671679197994987,
97
+ "eval_f1": 0.8446181767415888,
98
+ "eval_loss": 0.3037036657333374,
99
+ "eval_precision": 0.835902201887332,
100
+ "eval_recall": 0.8560192762320422,
101
+ "eval_runtime": 1.8052,
102
+ "eval_samples_per_second": 221.025,
103
+ "eval_steps_per_second": 27.697,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 3.427933692932129,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.257,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.8771929824561403,
116
+ "eval_f1": 0.8475258334958082,
117
+ "eval_loss": 0.2832619249820709,
118
+ "eval_precision": 0.8591828192414193,
119
+ "eval_recall": 0.8381069285324605,
120
+ "eval_runtime": 1.8047,
121
+ "eval_samples_per_second": 221.086,
122
+ "eval_steps_per_second": 27.705,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 1.5598583221435547,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.2405,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
  "eval_accuracy": 0.8847117794486216,
135
+ "eval_f1": 0.8616171059774413,
136
+ "eval_loss": 0.28606978058815,
137
+ "eval_precision": 0.859873949579832,
138
+ "eval_recall": 0.8634297144935443,
139
+ "eval_runtime": 1.8066,
140
+ "eval_samples_per_second": 220.859,
141
+ "eval_steps_per_second": 27.677,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 1.1964571475982666,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.2163,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
  "eval_accuracy": 0.8796992481203008,
154
+ "eval_f1": 0.8556004584112431,
155
+ "eval_loss": 0.295376181602478,
156
+ "eval_precision": 0.8538865546218487,
157
+ "eval_recall": 0.85738316057465,
158
+ "eval_runtime": 1.8106,
159
+ "eval_samples_per_second": 220.364,
160
+ "eval_steps_per_second": 27.614,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 4.184772491455078,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.2135,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.87468671679198,
173
+ "eval_f1": 0.8472902633190447,
174
+ "eval_loss": 0.29421547055244446,
175
+ "eval_precision": 0.8510272912927781,
176
+ "eval_recall": 0.8438352427714131,
177
+ "eval_runtime": 1.8081,
178
+ "eval_samples_per_second": 220.677,
179
+ "eval_steps_per_second": 27.654,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 4.817326545715332,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.2001,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.8822055137844611,
192
+ "eval_f1": 0.8537492688633261,
193
+ "eval_loss": 0.3002479076385498,
194
+ "eval_precision": 0.8656062850151329,
195
+ "eval_recall": 0.8441534824513548,
196
+ "eval_runtime": 1.8147,
197
+ "eval_samples_per_second": 219.877,
198
+ "eval_steps_per_second": 27.553,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 2.364151954650879,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.1825,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.8922305764411027,
211
+ "eval_f1": 0.8676331036823873,
212
+ "eval_loss": 0.30105578899383545,
213
+ "eval_precision": 0.8749292230261088,
214
+ "eval_recall": 0.8612474995453718,
215
+ "eval_runtime": 1.7869,
216
+ "eval_samples_per_second": 223.295,
217
+ "eval_steps_per_second": 27.982,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 4.355996608734131,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.1765,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.8897243107769424,
230
+ "eval_f1": 0.8656154317207594,
231
+ "eval_loss": 0.28576698899269104,
232
+ "eval_precision": 0.869546382820719,
233
+ "eval_recall": 0.861974904528096,
234
+ "eval_runtime": 1.7863,
235
+ "eval_samples_per_second": 223.361,
236
+ "eval_steps_per_second": 27.99,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 1.647818922996521,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.1674,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.8947368421052632,
249
+ "eval_f1": 0.8748655913978494,
250
+ "eval_loss": 0.29319024085998535,
251
+ "eval_precision": 0.86983032873807,
252
+ "eval_recall": 0.8805237315875614,
253
+ "eval_runtime": 1.7923,
254
+ "eval_samples_per_second": 222.613,
255
+ "eval_steps_per_second": 27.896,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 2.9686295986175537,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.1597,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.8872180451127819,
268
+ "eval_f1": 0.8668668668668669,
269
+ "eval_loss": 0.2937151789665222,
270
+ "eval_precision": 0.8598901098901099,
271
+ "eval_recall": 0.8752045826513912,
272
+ "eval_runtime": 1.8072,
273
+ "eval_samples_per_second": 220.782,
274
+ "eval_steps_per_second": 27.667,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 2.176563262939453,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.1564,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.8947368421052632,
287
+ "eval_f1": 0.8717238211879976,
288
+ "eval_loss": 0.2963174879550934,
289
+ "eval_precision": 0.8757194133300328,
290
+ "eval_recall": 0.8680214584469903,
291
+ "eval_runtime": 1.8185,
292
+ "eval_samples_per_second": 219.414,
293
+ "eval_steps_per_second": 27.495,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 0.7651334404945374,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.142,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
  "eval_accuracy": 0.8922305764411027,
306
+ "eval_f1": 0.8683279483657071,
307
+ "eval_loss": 0.302460253238678,
308
+ "eval_precision": 0.873366724738676,
309
+ "eval_recall": 0.863747954173486,
310
+ "eval_runtime": 1.8132,
311
+ "eval_samples_per_second": 220.059,
312
+ "eval_steps_per_second": 27.576,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 0.19514738023281097,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.143,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.8897243107769424,
325
+ "eval_f1": 0.8682773109243698,
326
+ "eval_loss": 0.2950632870197296,
327
+ "eval_precision": 0.864855223259409,
328
+ "eval_recall": 0.8719767230405528,
329
+ "eval_runtime": 1.8087,
330
+ "eval_samples_per_second": 220.597,
331
+ "eval_steps_per_second": 27.644,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 0.15066123008728027,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.1315,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.8822055137844611,
344
+ "eval_f1": 0.858259325044405,
345
+ "eval_loss": 0.3012860417366028,
346
+ "eval_precision": 0.8573798178418481,
347
+ "eval_recall": 0.8591562102200401,
348
+ "eval_runtime": 1.8058,
349
+ "eval_samples_per_second": 220.949,
350
+ "eval_steps_per_second": 27.688,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 0.04642534255981445,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.1378,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.8872180451127819,
363
+ "eval_f1": 0.8629148629148629,
364
+ "eval_loss": 0.30377450585365295,
365
+ "eval_precision": 0.8657894736842104,
366
+ "eval_recall": 0.860201854882706,
367
+ "eval_runtime": 1.8121,
368
+ "eval_samples_per_second": 220.187,
369
+ "eval_steps_per_second": 27.592,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 3.5789785385131836,
375
  "learning_rate": 0.0,
376
+ "loss": 0.1333,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.8847117794486216,
382
+ "eval_f1": 0.8609292598654301,
383
+ "eval_loss": 0.29736196994781494,
384
+ "eval_precision": 0.8609292598654301,
385
+ "eval_recall": 0.8609292598654301,
386
+ "eval_runtime": 1.814,
387
+ "eval_samples_per_second": 219.959,
388
+ "eval_steps_per_second": 27.564,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8444128359504000.0,
395
+ "train_loss": 0.23053875125822473,
396
+ "train_runtime": 628.042,
397
+ "train_samples_per_second": 115.852,
398
+ "train_steps_per_second": 3.885
399
  }
400
  ],
401
  "logging_steps": 500,