apwic commited on
Commit
305daf6
1 Parent(s): a678223

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accuracy": 0.9030662710187932,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.8897243107769424,
5
- "eval_f1": 0.8649122807017544,
6
- "eval_loss": 0.2823803424835205,
7
- "eval_precision": 0.8710116366366366,
8
- "eval_recall": 0.8594744498999818,
9
- "eval_runtime": 4.9357,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 80.84,
12
- "eval_steps_per_second": 10.13,
13
- "f1": 0.8843184599146265,
14
- "precision": 0.8814837991934362,
15
- "recall": 0.8873297438745447,
16
- "train_loss": 0.23294218016452475,
17
- "train_runtime": 1905.4788,
18
  "train_samples": 3638,
19
- "train_samples_per_second": 38.185,
20
- "train_steps_per_second": 1.281
21
  }
 
1
  {
2
+ "accuracy": 0.904055390702275,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.8822055137844611,
5
+ "eval_f1": 0.858259325044405,
6
+ "eval_loss": 0.3389217257499695,
7
+ "eval_precision": 0.8573798178418481,
8
+ "eval_recall": 0.8591562102200401,
9
+ "eval_runtime": 1.811,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 220.315,
12
+ "eval_steps_per_second": 27.608,
13
+ "f1": 0.8862491460015474,
14
+ "precision": 0.881173503483252,
15
+ "recall": 0.8919373664542485,
16
+ "train_loss": 0.22970127551282038,
17
+ "train_runtime": 621.3103,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 117.107,
20
+ "train_steps_per_second": 3.927
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.8897243107769424,
4
- "eval_f1": 0.8649122807017544,
5
- "eval_loss": 0.2823803424835205,
6
- "eval_precision": 0.8710116366366366,
7
- "eval_recall": 0.8594744498999818,
8
- "eval_runtime": 4.9357,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 80.84,
11
- "eval_steps_per_second": 10.13
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.8822055137844611,
4
+ "eval_f1": 0.858259325044405,
5
+ "eval_loss": 0.3389217257499695,
6
+ "eval_precision": 0.8573798178418481,
7
+ "eval_recall": 0.8591562102200401,
8
+ "eval_runtime": 1.811,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 220.315,
11
+ "eval_steps_per_second": 27.608
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.9030662710187932,
3
- "f1": 0.8843184599146265,
4
- "precision": 0.8814837991934362,
5
- "recall": 0.8873297438745447
6
  }
 
1
  {
2
+ "accuracy": 0.904055390702275,
3
+ "f1": 0.8862491460015474,
4
+ "precision": 0.881173503483252,
5
+ "recall": 0.8919373664542485
6
  }
predict_results.txt CHANGED
@@ -13,14 +13,14 @@ index prediction
13
  11 1
14
  12 1
15
  13 0
16
- 14 0
17
  15 1
18
  16 1
19
  17 1
20
  18 1
21
  19 1
22
  20 1
23
- 21 0
24
  22 1
25
  23 1
26
  24 1
@@ -30,13 +30,13 @@ index prediction
30
  28 1
31
  29 1
32
  30 1
33
- 31 0
34
  32 1
35
  33 1
36
  34 1
37
  35 1
38
  36 1
39
- 37 0
40
  38 1
41
  39 1
42
  40 1
@@ -58,7 +58,7 @@ index prediction
58
  56 1
59
  57 1
60
  58 1
61
- 59 0
62
  60 1
63
  61 1
64
  62 0
@@ -75,7 +75,7 @@ index prediction
75
  73 1
76
  74 1
77
  75 1
78
- 76 0
79
  77 1
80
  78 0
81
  79 1
@@ -94,7 +94,7 @@ index prediction
94
  92 1
95
  93 1
96
  94 1
97
- 95 1
98
  96 1
99
  97 1
100
  98 1
@@ -105,10 +105,10 @@ index prediction
105
  103 1
106
  104 1
107
  105 0
108
- 106 1
109
  107 1
110
  108 1
111
- 109 1
112
  110 1
113
  111 1
114
  112 1
@@ -116,7 +116,7 @@ index prediction
116
  114 1
117
  115 1
118
  116 1
119
- 117 1
120
  118 1
121
  119 1
122
  120 1
@@ -128,7 +128,7 @@ index prediction
128
  126 1
129
  127 1
130
  128 1
131
- 129 0
132
  130 1
133
  131 1
134
  132 1
@@ -144,16 +144,16 @@ index prediction
144
  142 1
145
  143 1
146
  144 1
147
- 145 1
148
  146 1
149
  147 1
150
  148 1
151
  149 1
152
  150 1
153
- 151 1
154
  152 1
155
  153 1
156
- 154 0
157
  155 1
158
  156 0
159
  157 1
@@ -162,7 +162,7 @@ index prediction
162
  160 1
163
  161 0
164
  162 1
165
- 163 1
166
  164 1
167
  165 1
168
  166 0
@@ -186,10 +186,10 @@ index prediction
186
  184 1
187
  185 1
188
  186 1
189
- 187 1
190
  188 1
191
  189 0
192
- 190 1
193
  191 1
194
  192 1
195
  193 0
@@ -217,7 +217,7 @@ index prediction
217
  215 0
218
  216 1
219
  217 0
220
- 218 0
221
  219 1
222
  220 1
223
  221 1
@@ -232,7 +232,7 @@ index prediction
232
  230 0
233
  231 1
234
  232 1
235
- 233 0
236
  234 1
237
  235 1
238
  236 1
@@ -272,7 +272,7 @@ index prediction
272
  270 1
273
  271 0
274
  272 1
275
- 273 0
276
  274 1
277
  275 1
278
  276 1
@@ -286,9 +286,9 @@ index prediction
286
  284 1
287
  285 1
288
  286 1
289
- 287 0
290
  288 1
291
- 289 0
292
  290 1
293
  291 1
294
  292 1
@@ -302,7 +302,7 @@ index prediction
302
  300 0
303
  301 0
304
  302 0
305
- 303 0
306
  304 0
307
  305 0
308
  306 0
@@ -312,7 +312,7 @@ index prediction
312
  310 0
313
  311 0
314
  312 0
315
- 313 1
316
  314 0
317
  315 0
318
  316 0
@@ -338,10 +338,10 @@ index prediction
338
  336 0
339
  337 0
340
  338 0
341
- 339 1
342
  340 0
343
  341 0
344
- 342 1
345
  343 0
346
  344 0
347
  345 0
@@ -363,7 +363,7 @@ index prediction
363
  361 0
364
  362 0
365
  363 0
366
- 364 1
367
  365 0
368
  366 0
369
  367 0
@@ -377,7 +377,7 @@ index prediction
377
  375 0
378
  376 0
379
  377 0
380
- 378 1
381
  379 0
382
  380 0
383
  381 0
@@ -419,9 +419,9 @@ index prediction
419
  417 0
420
  418 0
421
  419 0
422
- 420 0
423
  421 0
424
- 422 0
425
  423 0
426
  424 0
427
  425 0
@@ -463,7 +463,7 @@ index prediction
463
  461 0
464
  462 0
465
  463 0
466
- 464 1
467
  465 0
468
  466 0
469
  467 0
@@ -472,7 +472,7 @@ index prediction
472
  470 0
473
  471 0
474
  472 1
475
- 473 0
476
  474 1
477
  475 0
478
  476 1
@@ -538,7 +538,7 @@ index prediction
538
  536 0
539
  537 0
540
  538 0
541
- 539 0
542
  540 0
543
  541 0
544
  542 0
@@ -601,7 +601,7 @@ index prediction
601
  599 0
602
  600 0
603
  601 0
604
- 602 0
605
  603 0
606
  604 0
607
  605 1
@@ -621,7 +621,7 @@ index prediction
621
  619 0
622
  620 0
623
  621 0
624
- 622 0
625
  623 0
626
  624 0
627
  625 0
@@ -672,8 +672,8 @@ index prediction
672
  670 0
673
  671 0
674
  672 0
675
- 673 0
676
- 674 0
677
  675 0
678
  676 0
679
  677 0
@@ -688,7 +688,7 @@ index prediction
688
  686 0
689
  687 0
690
  688 0
691
- 689 1
692
  690 0
693
  691 0
694
  692 0
@@ -740,7 +740,7 @@ index prediction
740
  738 0
741
  739 0
742
  740 0
743
- 741 0
744
  742 0
745
  743 0
746
  744 0
@@ -784,7 +784,7 @@ index prediction
784
  782 0
785
  783 0
786
  784 0
787
- 785 0
788
  786 0
789
  787 0
790
  788 0
@@ -797,19 +797,19 @@ index prediction
797
  795 1
798
  796 0
799
  797 0
800
- 798 1
801
- 799 1
802
  800 0
803
  801 0
804
- 802 0
805
  803 0
806
  804 0
807
  805 0
808
  806 0
809
  807 0
810
  808 0
811
- 809 0
812
- 810 1
813
  811 0
814
  812 0
815
  813 0
@@ -831,7 +831,7 @@ index prediction
831
  829 0
832
  830 0
833
  831 1
834
- 832 1
835
  833 0
836
  834 0
837
  835 0
@@ -859,7 +859,7 @@ index prediction
859
  857 1
860
  858 0
861
  859 0
862
- 860 0
863
  861 0
864
  862 0
865
  863 0
@@ -885,7 +885,7 @@ index prediction
885
  883 0
886
  884 0
887
  885 0
888
- 886 1
889
  887 0
890
  888 0
891
  889 0
@@ -912,7 +912,7 @@ index prediction
912
  910 0
913
  911 0
914
  912 1
915
- 913 1
916
  914 0
917
  915 0
918
  916 0
@@ -923,12 +923,12 @@ index prediction
923
  921 1
924
  922 0
925
  923 0
926
- 924 1
927
  925 0
928
  926 1
929
  927 0
930
  928 0
931
- 929 0
932
  930 0
933
  931 0
934
  932 0
@@ -946,7 +946,7 @@ index prediction
946
  944 0
947
  945 1
948
  946 0
949
- 947 0
950
  948 0
951
  949 0
952
  950 0
@@ -966,7 +966,7 @@ index prediction
966
  964 0
967
  965 0
968
  966 0
969
- 967 0
970
  968 0
971
  969 0
972
  970 0
 
13
  11 1
14
  12 1
15
  13 0
16
+ 14 1
17
  15 1
18
  16 1
19
  17 1
20
  18 1
21
  19 1
22
  20 1
23
+ 21 1
24
  22 1
25
  23 1
26
  24 1
 
30
  28 1
31
  29 1
32
  30 1
33
+ 31 1
34
  32 1
35
  33 1
36
  34 1
37
  35 1
38
  36 1
39
+ 37 1
40
  38 1
41
  39 1
42
  40 1
 
58
  56 1
59
  57 1
60
  58 1
61
+ 59 1
62
  60 1
63
  61 1
64
  62 0
 
75
  73 1
76
  74 1
77
  75 1
78
+ 76 1
79
  77 1
80
  78 0
81
  79 1
 
94
  92 1
95
  93 1
96
  94 1
97
+ 95 0
98
  96 1
99
  97 1
100
  98 1
 
105
  103 1
106
  104 1
107
  105 0
108
+ 106 0
109
  107 1
110
  108 1
111
+ 109 0
112
  110 1
113
  111 1
114
  112 1
 
116
  114 1
117
  115 1
118
  116 1
119
+ 117 0
120
  118 1
121
  119 1
122
  120 1
 
128
  126 1
129
  127 1
130
  128 1
131
+ 129 1
132
  130 1
133
  131 1
134
  132 1
 
144
  142 1
145
  143 1
146
  144 1
147
+ 145 0
148
  146 1
149
  147 1
150
  148 1
151
  149 1
152
  150 1
153
+ 151 0
154
  152 1
155
  153 1
156
+ 154 1
157
  155 1
158
  156 0
159
  157 1
 
162
  160 1
163
  161 0
164
  162 1
165
+ 163 0
166
  164 1
167
  165 1
168
  166 0
 
186
  184 1
187
  185 1
188
  186 1
189
+ 187 0
190
  188 1
191
  189 0
192
+ 190 0
193
  191 1
194
  192 1
195
  193 0
 
217
  215 0
218
  216 1
219
  217 0
220
+ 218 1
221
  219 1
222
  220 1
223
  221 1
 
232
  230 0
233
  231 1
234
  232 1
235
+ 233 1
236
  234 1
237
  235 1
238
  236 1
 
272
  270 1
273
  271 0
274
  272 1
275
+ 273 1
276
  274 1
277
  275 1
278
  276 1
 
286
  284 1
287
  285 1
288
  286 1
289
+ 287 1
290
  288 1
291
+ 289 1
292
  290 1
293
  291 1
294
  292 1
 
302
  300 0
303
  301 0
304
  302 0
305
+ 303 1
306
  304 0
307
  305 0
308
  306 0
 
312
  310 0
313
  311 0
314
  312 0
315
+ 313 0
316
  314 0
317
  315 0
318
  316 0
 
338
  336 0
339
  337 0
340
  338 0
341
+ 339 0
342
  340 0
343
  341 0
344
+ 342 0
345
  343 0
346
  344 0
347
  345 0
 
363
  361 0
364
  362 0
365
  363 0
366
+ 364 0
367
  365 0
368
  366 0
369
  367 0
 
377
  375 0
378
  376 0
379
  377 0
380
+ 378 0
381
  379 0
382
  380 0
383
  381 0
 
419
  417 0
420
  418 0
421
  419 0
422
+ 420 1
423
  421 0
424
+ 422 1
425
  423 0
426
  424 0
427
  425 0
 
463
  461 0
464
  462 0
465
  463 0
466
+ 464 0
467
  465 0
468
  466 0
469
  467 0
 
472
  470 0
473
  471 0
474
  472 1
475
+ 473 1
476
  474 1
477
  475 0
478
  476 1
 
538
  536 0
539
  537 0
540
  538 0
541
+ 539 1
542
  540 0
543
  541 0
544
  542 0
 
601
  599 0
602
  600 0
603
  601 0
604
+ 602 1
605
  603 0
606
  604 0
607
  605 1
 
621
  619 0
622
  620 0
623
  621 0
624
+ 622 1
625
  623 0
626
  624 0
627
  625 0
 
672
  670 0
673
  671 0
674
  672 0
675
+ 673 1
676
+ 674 1
677
  675 0
678
  676 0
679
  677 0
 
688
  686 0
689
  687 0
690
  688 0
691
+ 689 0
692
  690 0
693
  691 0
694
  692 0
 
740
  738 0
741
  739 0
742
  740 0
743
+ 741 1
744
  742 0
745
  743 0
746
  744 0
 
784
  782 0
785
  783 0
786
  784 0
787
+ 785 1
788
  786 0
789
  787 0
790
  788 0
 
797
  795 1
798
  796 0
799
  797 0
800
+ 798 0
801
+ 799 0
802
  800 0
803
  801 0
804
+ 802 1
805
  803 0
806
  804 0
807
  805 0
808
  806 0
809
  807 0
810
  808 0
811
+ 809 1
812
+ 810 0
813
  811 0
814
  812 0
815
  813 0
 
831
  829 0
832
  830 0
833
  831 1
834
+ 832 0
835
  833 0
836
  834 0
837
  835 0
 
859
  857 1
860
  858 0
861
  859 0
862
+ 860 1
863
  861 0
864
  862 0
865
  863 0
 
885
  883 0
886
  884 0
887
  885 0
888
+ 886 0
889
  887 0
890
  888 0
891
  889 0
 
912
  910 0
913
  911 0
914
  912 1
915
+ 913 0
916
  914 0
917
  915 0
918
  916 0
 
923
  921 1
924
  922 0
925
  923 0
926
+ 924 0
927
  925 0
928
  926 1
929
  927 0
930
  928 0
931
+ 929 1
932
  930 0
933
  931 0
934
  932 0
 
946
  944 0
947
  945 1
948
  946 0
949
+ 947 1
950
  948 0
951
  949 0
952
  950 0
 
966
  964 0
967
  965 0
968
  966 0
969
+ 967 1
970
  968 0
971
  969 0
972
  970 0
runs/Jun03_14-28-41_a358b85c7679/events.out.tfevents.1717425557.a358b85c7679.140888.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa2ba2793098dc75e6508b9c16edb8322d39b79e4617f6dc1fa57a06ee056028
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.23294218016452475,
4
- "train_runtime": 1905.4788,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 38.185,
7
- "train_steps_per_second": 1.281
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.22970127551282038,
4
+ "train_runtime": 621.3103,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 117.107,
7
+ "train_steps_per_second": 3.927
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 4.96969747543335,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.5526,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7142857142857143,
21
- "eval_f1": 0.6246286393345217,
22
- "eval_loss": 0.5089075565338135,
23
- "eval_precision": 0.6432360742705571,
24
- "eval_recall": 0.6178396072013094,
25
- "eval_runtime": 5.1308,
26
- "eval_samples_per_second": 77.766,
27
- "eval_steps_per_second": 9.745,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 3.059352159500122,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.4578,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.7994987468671679,
40
- "eval_f1": 0.771804403774664,
41
- "eval_loss": 0.4193674325942993,
42
- "eval_precision": 0.7619509396853147,
43
- "eval_recall": 0.7906437534097108,
44
- "eval_runtime": 5.0426,
45
- "eval_samples_per_second": 79.125,
46
- "eval_steps_per_second": 9.915,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 3.294614553451538,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.3632,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.8370927318295739,
59
- "eval_f1": 0.7866573438078395,
60
- "eval_loss": 0.3468063175678253,
61
- "eval_precision": 0.8221645717626425,
62
- "eval_recall": 0.7672304055282779,
63
- "eval_runtime": 5.0451,
64
- "eval_samples_per_second": 79.086,
65
- "eval_steps_per_second": 9.911,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 3.894381046295166,
71
  "learning_rate": 4e-05,
72
- "loss": 0.3063,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.8822055137844611,
78
- "eval_f1": 0.8589543987905864,
79
- "eval_loss": 0.2975314259529114,
80
- "eval_precision": 0.8564068100358423,
81
- "eval_recall": 0.8616566648481543,
82
- "eval_runtime": 5.045,
83
- "eval_samples_per_second": 79.087,
84
- "eval_steps_per_second": 9.911,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 3.5420114994049072,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.2771,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.8721804511278195,
97
- "eval_f1": 0.8524204953403198,
98
- "eval_loss": 0.3194977939128876,
99
- "eval_precision": 0.8409052784611943,
100
- "eval_recall": 0.8695671940352792,
101
- "eval_runtime": 5.0554,
102
- "eval_samples_per_second": 78.926,
103
- "eval_steps_per_second": 9.89,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 2.310750961303711,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.2463,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.8771929824561403,
116
- "eval_f1": 0.8536612749904566,
117
- "eval_loss": 0.28968048095703125,
118
- "eval_precision": 0.8496330709593418,
119
- "eval_recall": 0.8581105655573741,
120
- "eval_runtime": 5.0495,
121
- "eval_samples_per_second": 79.017,
122
- "eval_steps_per_second": 9.902,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 2.563045024871826,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.2317,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.8847117794486216,
135
- "eval_f1": 0.8609292598654301,
136
- "eval_loss": 0.27178412675857544,
137
- "eval_precision": 0.8609292598654301,
138
- "eval_recall": 0.8609292598654301,
139
- "eval_runtime": 5.0509,
140
- "eval_samples_per_second": 78.995,
141
- "eval_steps_per_second": 9.899,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 1.676988959312439,
147
  "learning_rate": 3e-05,
148
- "loss": 0.2182,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8847117794486216,
154
- "eval_f1": 0.8654398826979472,
155
- "eval_loss": 0.26825374364852905,
156
- "eval_precision": 0.8556454369374502,
157
- "eval_recall": 0.8784324422622295,
158
- "eval_runtime": 5.0526,
159
- "eval_samples_per_second": 78.969,
160
- "eval_steps_per_second": 9.896,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 5.442895412445068,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.2065,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.8696741854636592,
173
- "eval_f1": 0.844327731092437,
174
- "eval_loss": 0.27725929021835327,
175
- "eval_precision": 0.8411320530352577,
176
- "eval_recall": 0.8477905073649754,
177
- "eval_runtime": 5.0455,
178
- "eval_samples_per_second": 79.08,
179
- "eval_steps_per_second": 9.91,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 4.095264434814453,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.2012,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.8822055137844611,
192
- "eval_f1": 0.852937255424767,
193
- "eval_loss": 0.28407934308052063,
194
- "eval_precision": 0.8674217731421121,
195
- "eval_recall": 0.8416530278232406,
196
- "eval_runtime": 5.0513,
197
- "eval_samples_per_second": 78.99,
198
- "eval_steps_per_second": 9.898,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 7.4546098709106445,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.1944,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.8847117794486216,
211
- "eval_f1": 0.8616171059774413,
212
- "eval_loss": 0.27333080768585205,
213
- "eval_precision": 0.859873949579832,
214
- "eval_recall": 0.8634297144935443,
215
- "eval_runtime": 5.0526,
216
- "eval_samples_per_second": 78.968,
217
- "eval_steps_per_second": 9.896,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 8.82780647277832,
223
  "learning_rate": 2e-05,
224
- "loss": 0.176,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.8972431077694235,
230
- "eval_f1": 0.8724195749658803,
231
- "eval_loss": 0.27092599868774414,
232
- "eval_precision": 0.8848766823362741,
233
- "eval_recall": 0.8622931442080378,
234
- "eval_runtime": 5.0447,
235
- "eval_samples_per_second": 79.094,
236
- "eval_steps_per_second": 9.911,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 3.990729331970215,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.168,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.8947368421052632,
249
- "eval_f1": 0.8736504011098378,
250
- "eval_loss": 0.26514801383018494,
251
- "eval_precision": 0.8718487394957983,
252
- "eval_recall": 0.8755228223313329,
253
- "eval_runtime": 5.0943,
254
- "eval_samples_per_second": 78.323,
255
- "eval_steps_per_second": 9.815,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 2.973822593688965,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.1644,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.8922305764411027,
268
- "eval_f1": 0.8727838950061173,
269
- "eval_loss": 0.2710956037044525,
270
- "eval_precision": 0.8656898656898657,
271
- "eval_recall": 0.8812511365702855,
272
- "eval_runtime": 5.0742,
273
- "eval_samples_per_second": 78.633,
274
- "eval_steps_per_second": 9.854,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 2.690046548843384,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.1541,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.8922305764411027,
287
- "eval_f1": 0.8683279483657071,
288
- "eval_loss": 0.2790246605873108,
289
- "eval_precision": 0.873366724738676,
290
- "eval_recall": 0.863747954173486,
291
- "eval_runtime": 5.055,
292
- "eval_samples_per_second": 78.932,
293
- "eval_steps_per_second": 9.891,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 1.9607453346252441,
299
  "learning_rate": 1e-05,
300
- "loss": 0.1574,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.8897243107769424,
306
- "eval_f1": 0.8656154317207594,
307
- "eval_loss": 0.27665647864341736,
308
- "eval_precision": 0.869546382820719,
309
- "eval_recall": 0.861974904528096,
310
- "eval_runtime": 5.06,
311
- "eval_samples_per_second": 78.853,
312
- "eval_steps_per_second": 9.881,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 0.14830851554870605,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.1508,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.8897243107769424,
325
- "eval_f1": 0.8676337535436396,
326
- "eval_loss": 0.2825946509838104,
327
- "eval_precision": 0.8658613445378152,
328
- "eval_recall": 0.8694762684124386,
329
- "eval_runtime": 5.0609,
330
- "eval_samples_per_second": 78.84,
331
- "eval_steps_per_second": 9.88,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 1.0990123748779297,
337
  "learning_rate": 5e-06,
338
- "loss": 0.1463,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.8872180451127819,
344
- "eval_f1": 0.8614765038536611,
345
- "eval_loss": 0.2823769748210907,
346
- "eval_precision": 0.8686536646744258,
347
- "eval_recall": 0.8552009456264775,
348
- "eval_runtime": 5.0926,
349
- "eval_samples_per_second": 78.349,
350
- "eval_steps_per_second": 9.818,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 0.10962895303964615,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.1467,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.8847117794486216,
363
- "eval_f1": 0.8572517421602788,
364
- "eval_loss": 0.2876257598400116,
365
- "eval_precision": 0.8679426449878376,
366
- "eval_recall": 0.8484269867248591,
367
- "eval_runtime": 5.0519,
368
- "eval_samples_per_second": 78.981,
369
- "eval_steps_per_second": 9.897,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 4.516838550567627,
375
  "learning_rate": 0.0,
376
- "loss": 0.1399,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.8897243107769424,
382
- "eval_f1": 0.8649122807017544,
383
- "eval_loss": 0.2823803424835205,
384
- "eval_precision": 0.8710116366366366,
385
- "eval_recall": 0.8594744498999818,
386
- "eval_runtime": 5.0556,
387
- "eval_samples_per_second": 78.922,
388
- "eval_steps_per_second": 9.89,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8444128359504000.0,
395
- "train_loss": 0.23294218016452475,
396
- "train_runtime": 1905.4788,
397
- "train_samples_per_second": 38.185,
398
- "train_steps_per_second": 1.281
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 5.112319469451904,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.5509,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7393483709273183,
21
+ "eval_f1": 0.6507070707070707,
22
+ "eval_loss": 0.4983255863189697,
23
+ "eval_precision": 0.6800605637083625,
24
+ "eval_recall": 0.6405710129114385,
25
+ "eval_runtime": 1.7657,
26
+ "eval_samples_per_second": 225.971,
27
+ "eval_steps_per_second": 28.317,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 3.6866044998168945,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.4511,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.7769423558897243,
40
+ "eval_f1": 0.7593078346448687,
41
+ "eval_loss": 0.4377373456954956,
42
+ "eval_precision": 0.7546743295019157,
43
+ "eval_recall": 0.8021913075104565,
44
+ "eval_runtime": 1.769,
45
+ "eval_samples_per_second": 225.555,
46
+ "eval_steps_per_second": 28.265,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 3.584764242172241,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.368,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.8571428571428571,
59
+ "eval_f1": 0.8196102381877741,
60
+ "eval_loss": 0.32603567838668823,
61
+ "eval_precision": 0.8381270903010034,
62
+ "eval_recall": 0.8064193489725404,
63
+ "eval_runtime": 1.7715,
64
+ "eval_samples_per_second": 225.23,
65
+ "eval_steps_per_second": 28.224,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 2.8483095169067383,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.3019,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.8646616541353384,
78
+ "eval_f1": 0.8333281762485303,
79
+ "eval_loss": 0.30364951491355896,
80
+ "eval_precision": 0.8410471369819678,
81
+ "eval_recall": 0.8267412256773959,
82
+ "eval_runtime": 1.7702,
83
+ "eval_samples_per_second": 225.393,
84
+ "eval_steps_per_second": 28.245,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 2.774143934249878,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.2668,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.8671679197994987,
97
+ "eval_f1": 0.8424651921601347,
98
+ "eval_loss": 0.31921207904815674,
99
+ "eval_precision": 0.8372140762463343,
100
+ "eval_recall": 0.8485179123476996,
101
+ "eval_runtime": 1.7714,
102
+ "eval_samples_per_second": 225.248,
103
+ "eval_steps_per_second": 28.227,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 4.2327117919921875,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.2471,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.8621553884711779,
116
+ "eval_f1": 0.8380263497804185,
117
+ "eval_loss": 0.30589351058006287,
118
+ "eval_precision": 0.830503344095941,
119
+ "eval_recall": 0.8474722676850337,
120
+ "eval_runtime": 1.7732,
121
+ "eval_samples_per_second": 225.015,
122
+ "eval_steps_per_second": 28.197,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 0.5115749835968018,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.2422,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.87468671679198,
135
+ "eval_f1": 0.8524146298159436,
136
+ "eval_loss": 0.2949831783771515,
137
+ "eval_precision": 0.8451250578971746,
138
+ "eval_recall": 0.8613384251682124,
139
+ "eval_runtime": 1.7731,
140
+ "eval_samples_per_second": 225.024,
141
+ "eval_steps_per_second": 28.198,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 1.2918312549591064,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.2258,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8721804511278195,
154
+ "eval_f1": 0.8454251965513313,
155
+ "eval_loss": 0.29280924797058105,
156
+ "eval_precision": 0.8463049835506276,
157
+ "eval_recall": 0.8445626477541371,
158
+ "eval_runtime": 1.7799,
159
+ "eval_samples_per_second": 224.171,
160
+ "eval_steps_per_second": 28.092,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 5.160737037658691,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.2054,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.8796992481203008,
173
+ "eval_f1": 0.8533986527862829,
174
+ "eval_loss": 0.30492648482322693,
175
+ "eval_precision": 0.8572003218020917,
176
+ "eval_recall": 0.8498817966903074,
177
+ "eval_runtime": 1.779,
178
+ "eval_samples_per_second": 224.288,
179
+ "eval_steps_per_second": 28.106,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 3.917464017868042,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.2009,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.87468671679198,
192
+ "eval_f1": 0.8488361520276414,
193
+ "eval_loss": 0.30127042531967163,
194
+ "eval_precision": 0.8488361520276414,
195
+ "eval_recall": 0.8488361520276414,
196
+ "eval_runtime": 1.7757,
197
+ "eval_samples_per_second": 224.7,
198
+ "eval_steps_per_second": 28.158,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 6.667805194854736,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.1755,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.8822055137844611,
211
+ "eval_f1": 0.858259325044405,
212
+ "eval_loss": 0.30701279640197754,
213
+ "eval_precision": 0.8573798178418481,
214
+ "eval_recall": 0.8591562102200401,
215
+ "eval_runtime": 1.7942,
216
+ "eval_samples_per_second": 222.38,
217
+ "eval_steps_per_second": 27.867,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 8.611730575561523,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.1821,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.8822055137844611,
230
+ "eval_f1": 0.8568221901555235,
231
+ "eval_loss": 0.2995355427265167,
232
+ "eval_precision": 0.8596491228070176,
233
+ "eval_recall": 0.8541553009638116,
234
+ "eval_runtime": 1.7796,
235
+ "eval_samples_per_second": 224.202,
236
+ "eval_steps_per_second": 28.095,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 2.71295428276062,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.1652,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.8847117794486216,
249
+ "eval_f1": 0.866029197080292,
250
+ "eval_loss": 0.3272043764591217,
251
+ "eval_precision": 0.8552631578947368,
252
+ "eval_recall": 0.8809328968903437,
253
+ "eval_runtime": 1.7775,
254
+ "eval_samples_per_second": 224.467,
255
+ "eval_steps_per_second": 28.129,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 5.373868942260742,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.1566,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.8897243107769424,
268
+ "eval_f1": 0.8718540145985401,
269
+ "eval_loss": 0.33357149362564087,
270
+ "eval_precision": 0.8609022556390977,
271
+ "eval_recall": 0.886979450809238,
272
+ "eval_runtime": 1.7836,
273
+ "eval_samples_per_second": 223.703,
274
+ "eval_steps_per_second": 28.033,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 5.369639873504639,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.1634,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.8847117794486216,
287
+ "eval_f1": 0.8622899159663866,
288
+ "eval_loss": 0.314995676279068,
289
+ "eval_precision": 0.8589244307033712,
290
+ "eval_recall": 0.8659301691216585,
291
+ "eval_runtime": 1.777,
292
+ "eval_samples_per_second": 224.539,
293
+ "eval_steps_per_second": 28.138,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 6.779192924499512,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.1496,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.8922305764411027,
306
+ "eval_f1": 0.8696722245432793,
307
+ "eval_loss": 0.3320792317390442,
308
+ "eval_precision": 0.8706135006701596,
309
+ "eval_recall": 0.8687488634297145,
310
+ "eval_runtime": 1.7833,
311
+ "eval_samples_per_second": 223.741,
312
+ "eval_steps_per_second": 28.038,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 1.1515932083129883,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.1355,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.8847117794486216,
325
+ "eval_f1": 0.8616171059774413,
326
+ "eval_loss": 0.32759982347488403,
327
+ "eval_precision": 0.859873949579832,
328
+ "eval_recall": 0.8634297144935443,
329
+ "eval_runtime": 1.7782,
330
+ "eval_samples_per_second": 224.387,
331
+ "eval_steps_per_second": 28.119,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 1.6571087837219238,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.1477,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.8796992481203008,
344
+ "eval_f1": 0.8563025210084034,
345
+ "eval_loss": 0.33653610944747925,
346
+ "eval_precision": 0.8529936381473334,
347
+ "eval_recall": 0.8598836152027641,
348
+ "eval_runtime": 1.7851,
349
+ "eval_samples_per_second": 223.518,
350
+ "eval_steps_per_second": 28.01,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 2.6701011657714844,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.1317,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.8822055137844611,
363
+ "eval_f1": 0.858259325044405,
364
+ "eval_loss": 0.3385031819343567,
365
+ "eval_precision": 0.8573798178418481,
366
+ "eval_recall": 0.8591562102200401,
367
+ "eval_runtime": 1.7765,
368
+ "eval_samples_per_second": 224.597,
369
+ "eval_steps_per_second": 28.145,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 3.197312593460083,
375
  "learning_rate": 0.0,
376
+ "loss": 0.1267,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.8822055137844611,
382
+ "eval_f1": 0.858259325044405,
383
+ "eval_loss": 0.3389217257499695,
384
+ "eval_precision": 0.8573798178418481,
385
+ "eval_recall": 0.8591562102200401,
386
+ "eval_runtime": 1.7779,
387
+ "eval_samples_per_second": 224.423,
388
+ "eval_steps_per_second": 28.123,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8444128359504000.0,
395
+ "train_loss": 0.22970127551282038,
396
+ "train_runtime": 621.3103,
397
+ "train_samples_per_second": 117.107,
398
+ "train_steps_per_second": 3.927
399
  }
400
  ],
401
  "logging_steps": 500,