End of training
Browse files- README.md +2 -0
- all_results.json +13 -13
- eval_results.json +9 -9
- predict_results.txt +83 -83
- runs/May14_19-19-36_indolem-petl-vm/events.out.tfevents.1715717191.indolem-petl-vm.306885.1 +3 -0
- train_results.json +5 -5
- trainer_state.json +384 -23
README.md
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
---
|
|
|
|
|
2 |
license: mit
|
3 |
base_model: indolem/indobert-base-uncased
|
4 |
tags:
|
|
|
1 |
---
|
2 |
+
language:
|
3 |
+
- id
|
4 |
license: mit
|
5 |
base_model: indolem/indobert-base-uncased
|
6 |
tags:
|
all_results.json
CHANGED
@@ -1,17 +1,17 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_f1": 0.
|
5 |
-
"eval_loss": 0.
|
6 |
-
"eval_precision": 0.
|
7 |
-
"eval_recall": 0.
|
8 |
-
"eval_runtime": 4.
|
9 |
"eval_samples": 399,
|
10 |
-
"eval_samples_per_second":
|
11 |
-
"eval_steps_per_second": 10.
|
12 |
-
"train_loss": 0.
|
13 |
-
"train_runtime":
|
14 |
"train_samples": 3638,
|
15 |
-
"train_samples_per_second":
|
16 |
-
"train_steps_per_second":
|
17 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 20.0,
|
3 |
+
"eval_accuracy": 0.9047619047619048,
|
4 |
+
"eval_f1": 0.8878351186601172,
|
5 |
+
"eval_loss": 0.7535876035690308,
|
6 |
+
"eval_precision": 0.879776516905975,
|
7 |
+
"eval_recall": 0.8976177486815784,
|
8 |
+
"eval_runtime": 4.6553,
|
9 |
"eval_samples": 399,
|
10 |
+
"eval_samples_per_second": 85.708,
|
11 |
+
"eval_steps_per_second": 10.74,
|
12 |
+
"train_loss": 0.06173487283655855,
|
13 |
+
"train_runtime": 2765.1299,
|
14 |
"train_samples": 3638,
|
15 |
+
"train_samples_per_second": 26.313,
|
16 |
+
"train_steps_per_second": 0.882
|
17 |
}
|
eval_results.json
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_f1": 0.
|
5 |
-
"eval_loss": 0.
|
6 |
-
"eval_precision": 0.
|
7 |
-
"eval_recall": 0.
|
8 |
-
"eval_runtime": 4.
|
9 |
"eval_samples": 399,
|
10 |
-
"eval_samples_per_second":
|
11 |
-
"eval_steps_per_second": 10.
|
12 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 20.0,
|
3 |
+
"eval_accuracy": 0.9047619047619048,
|
4 |
+
"eval_f1": 0.8878351186601172,
|
5 |
+
"eval_loss": 0.7535876035690308,
|
6 |
+
"eval_precision": 0.879776516905975,
|
7 |
+
"eval_recall": 0.8976177486815784,
|
8 |
+
"eval_runtime": 4.6553,
|
9 |
"eval_samples": 399,
|
10 |
+
"eval_samples_per_second": 85.708,
|
11 |
+
"eval_steps_per_second": 10.74
|
12 |
}
|
predict_results.txt
CHANGED
@@ -1,20 +1,20 @@
|
|
1 |
index prediction
|
2 |
0 1
|
3 |
-
1
|
4 |
2 1
|
5 |
3 1
|
6 |
-
4
|
7 |
5 1
|
8 |
6 1
|
9 |
7 1
|
10 |
8 0
|
11 |
-
9
|
12 |
10 1
|
13 |
11 1
|
14 |
12 1
|
15 |
13 1
|
16 |
14 1
|
17 |
-
15
|
18 |
16 1
|
19 |
17 1
|
20 |
18 1
|
@@ -23,12 +23,12 @@ index prediction
|
|
23 |
21 1
|
24 |
22 1
|
25 |
23 1
|
26 |
-
24
|
27 |
-
25
|
28 |
26 1
|
29 |
-
27
|
30 |
28 1
|
31 |
-
29
|
32 |
30 1
|
33 |
31 1
|
34 |
32 1
|
@@ -38,20 +38,20 @@ index prediction
|
|
38 |
36 1
|
39 |
37 1
|
40 |
38 1
|
41 |
-
39
|
42 |
40 1
|
43 |
41 1
|
44 |
42 1
|
45 |
-
43
|
46 |
-
44
|
47 |
-
45
|
48 |
-
46
|
49 |
47 1
|
50 |
48 1
|
51 |
49 0
|
52 |
50 1
|
53 |
51 1
|
54 |
-
52
|
55 |
53 1
|
56 |
54 1
|
57 |
55 1
|
@@ -63,7 +63,7 @@ index prediction
|
|
63 |
61 1
|
64 |
62 1
|
65 |
63 1
|
66 |
-
64
|
67 |
65 1
|
68 |
66 1
|
69 |
67 1
|
@@ -78,8 +78,8 @@ index prediction
|
|
78 |
76 1
|
79 |
77 0
|
80 |
78 1
|
81 |
-
79
|
82 |
-
80
|
83 |
81 0
|
84 |
82 1
|
85 |
83 1
|
@@ -91,7 +91,7 @@ index prediction
|
|
91 |
89 1
|
92 |
90 1
|
93 |
91 1
|
94 |
-
92
|
95 |
93 1
|
96 |
94 1
|
97 |
95 1
|
@@ -104,7 +104,7 @@ index prediction
|
|
104 |
102 1
|
105 |
103 1
|
106 |
104 1
|
107 |
-
105
|
108 |
106 1
|
109 |
107 1
|
110 |
108 1
|
@@ -112,7 +112,7 @@ index prediction
|
|
112 |
110 1
|
113 |
111 1
|
114 |
112 1
|
115 |
-
113
|
116 |
114 1
|
117 |
115 1
|
118 |
116 1
|
@@ -143,12 +143,12 @@ index prediction
|
|
143 |
141 1
|
144 |
142 1
|
145 |
143 1
|
146 |
-
144
|
147 |
145 1
|
148 |
146 1
|
149 |
147 1
|
150 |
148 1
|
151 |
-
149
|
152 |
150 1
|
153 |
151 1
|
154 |
152 1
|
@@ -163,24 +163,24 @@ index prediction
|
|
163 |
161 1
|
164 |
162 1
|
165 |
163 1
|
166 |
-
164
|
167 |
165 0
|
168 |
166 1
|
169 |
167 1
|
170 |
168 1
|
171 |
-
169
|
172 |
-
170
|
173 |
171 1
|
174 |
-
172
|
175 |
173 0
|
176 |
174 1
|
177 |
175 1
|
178 |
-
176
|
179 |
177 0
|
180 |
178 1
|
181 |
179 1
|
182 |
180 1
|
183 |
-
181
|
184 |
182 1
|
185 |
183 1
|
186 |
184 1
|
@@ -189,7 +189,7 @@ index prediction
|
|
189 |
187 1
|
190 |
188 1
|
191 |
189 1
|
192 |
-
190
|
193 |
191 1
|
194 |
192 1
|
195 |
193 1
|
@@ -197,7 +197,7 @@ index prediction
|
|
197 |
195 1
|
198 |
196 1
|
199 |
197 1
|
200 |
-
198
|
201 |
199 0
|
202 |
200 1
|
203 |
201 1
|
@@ -209,15 +209,15 @@ index prediction
|
|
209 |
207 1
|
210 |
208 1
|
211 |
209 1
|
212 |
-
210
|
213 |
211 1
|
214 |
212 1
|
215 |
213 1
|
216 |
214 0
|
217 |
215 1
|
218 |
216 0
|
219 |
-
217
|
220 |
-
218
|
221 |
219 1
|
222 |
220 0
|
223 |
221 1
|
@@ -227,11 +227,11 @@ index prediction
|
|
227 |
225 1
|
228 |
226 0
|
229 |
227 0
|
230 |
-
228
|
231 |
-
229
|
232 |
-
230
|
233 |
231 1
|
234 |
-
232
|
235 |
233 1
|
236 |
234 1
|
237 |
235 1
|
@@ -245,10 +245,10 @@ index prediction
|
|
245 |
243 1
|
246 |
244 1
|
247 |
245 1
|
248 |
-
246
|
249 |
247 1
|
250 |
-
248
|
251 |
-
249
|
252 |
250 0
|
253 |
251 1
|
254 |
252 1
|
@@ -296,7 +296,7 @@ index prediction
|
|
296 |
294 1
|
297 |
295 1
|
298 |
296 1
|
299 |
-
297
|
300 |
298 0
|
301 |
299 0
|
302 |
300 0
|
@@ -316,7 +316,7 @@ index prediction
|
|
316 |
314 0
|
317 |
315 0
|
318 |
316 0
|
319 |
-
317
|
320 |
318 1
|
321 |
319 0
|
322 |
320 0
|
@@ -327,17 +327,17 @@ index prediction
|
|
327 |
325 0
|
328 |
326 0
|
329 |
327 0
|
330 |
-
328
|
331 |
329 0
|
332 |
330 1
|
333 |
-
331
|
334 |
332 0
|
335 |
333 0
|
336 |
334 0
|
337 |
335 0
|
338 |
336 0
|
339 |
337 0
|
340 |
-
338
|
341 |
339 0
|
342 |
340 0
|
343 |
341 0
|
@@ -350,7 +350,7 @@ index prediction
|
|
350 |
348 0
|
351 |
349 0
|
352 |
350 0
|
353 |
-
351
|
354 |
352 0
|
355 |
353 0
|
356 |
354 0
|
@@ -363,9 +363,9 @@ index prediction
|
|
363 |
361 0
|
364 |
362 0
|
365 |
363 0
|
366 |
-
364
|
367 |
365 0
|
368 |
-
366
|
369 |
367 0
|
370 |
368 0
|
371 |
369 0
|
@@ -375,7 +375,7 @@ index prediction
|
|
375 |
373 0
|
376 |
374 0
|
377 |
375 0
|
378 |
-
376
|
379 |
377 0
|
380 |
378 0
|
381 |
379 0
|
@@ -400,8 +400,8 @@ index prediction
|
|
400 |
398 1
|
401 |
399 0
|
402 |
400 0
|
403 |
-
401
|
404 |
-
402
|
405 |
403 0
|
406 |
404 0
|
407 |
405 0
|
@@ -419,7 +419,7 @@ index prediction
|
|
419 |
417 0
|
420 |
418 0
|
421 |
419 0
|
422 |
-
420
|
423 |
421 0
|
424 |
422 0
|
425 |
423 0
|
@@ -446,12 +446,12 @@ index prediction
|
|
446 |
444 0
|
447 |
445 0
|
448 |
446 0
|
449 |
-
447
|
450 |
448 0
|
451 |
449 0
|
452 |
450 0
|
453 |
451 0
|
454 |
-
452
|
455 |
453 0
|
456 |
454 0
|
457 |
455 0
|
@@ -517,7 +517,7 @@ index prediction
|
|
517 |
515 0
|
518 |
516 0
|
519 |
517 0
|
520 |
-
518
|
521 |
519 0
|
522 |
520 0
|
523 |
521 0
|
@@ -535,13 +535,13 @@ index prediction
|
|
535 |
533 0
|
536 |
534 0
|
537 |
535 0
|
538 |
-
536
|
539 |
537 0
|
540 |
538 0
|
541 |
539 0
|
542 |
540 0
|
543 |
541 0
|
544 |
-
542
|
545 |
543 0
|
546 |
544 0
|
547 |
545 0
|
@@ -587,7 +587,7 @@ index prediction
|
|
587 |
585 0
|
588 |
586 0
|
589 |
587 0
|
590 |
-
588
|
591 |
589 0
|
592 |
590 0
|
593 |
591 0
|
@@ -595,7 +595,7 @@ index prediction
|
|
595 |
593 0
|
596 |
594 0
|
597 |
595 0
|
598 |
-
596
|
599 |
597 0
|
600 |
598 0
|
601 |
599 0
|
@@ -607,10 +607,10 @@ index prediction
|
|
607 |
605 0
|
608 |
606 0
|
609 |
607 0
|
610 |
-
608
|
611 |
609 0
|
612 |
610 1
|
613 |
-
611
|
614 |
612 0
|
615 |
613 0
|
616 |
614 0
|
@@ -631,10 +631,10 @@ index prediction
|
|
631 |
629 0
|
632 |
630 0
|
633 |
631 0
|
634 |
-
632
|
635 |
633 1
|
636 |
634 0
|
637 |
-
635
|
638 |
636 0
|
639 |
637 0
|
640 |
638 0
|
@@ -662,7 +662,7 @@ index prediction
|
|
662 |
660 0
|
663 |
661 0
|
664 |
662 0
|
665 |
-
663
|
666 |
664 0
|
667 |
665 0
|
668 |
666 0
|
@@ -702,7 +702,7 @@ index prediction
|
|
702 |
700 0
|
703 |
701 0
|
704 |
702 0
|
705 |
-
703
|
706 |
704 0
|
707 |
705 0
|
708 |
706 0
|
@@ -751,7 +751,7 @@ index prediction
|
|
751 |
749 0
|
752 |
750 0
|
753 |
751 0
|
754 |
-
752
|
755 |
753 0
|
756 |
754 0
|
757 |
755 0
|
@@ -764,12 +764,12 @@ index prediction
|
|
764 |
762 0
|
765 |
763 0
|
766 |
764 0
|
767 |
-
765
|
768 |
766 0
|
769 |
767 0
|
770 |
768 0
|
771 |
769 0
|
772 |
-
770
|
773 |
771 0
|
774 |
772 0
|
775 |
773 0
|
@@ -794,7 +794,7 @@ index prediction
|
|
794 |
792 0
|
795 |
793 0
|
796 |
794 0
|
797 |
-
795
|
798 |
796 0
|
799 |
797 0
|
800 |
798 0
|
@@ -808,7 +808,7 @@ index prediction
|
|
808 |
806 0
|
809 |
807 0
|
810 |
808 0
|
811 |
-
809
|
812 |
810 0
|
813 |
811 0
|
814 |
812 0
|
@@ -864,10 +864,10 @@ index prediction
|
|
864 |
862 0
|
865 |
863 0
|
866 |
864 0
|
867 |
-
865
|
868 |
866 0
|
869 |
867 0
|
870 |
-
868
|
871 |
869 0
|
872 |
870 0
|
873 |
871 0
|
@@ -883,20 +883,20 @@ index prediction
|
|
883 |
881 0
|
884 |
882 0
|
885 |
883 0
|
886 |
-
884
|
887 |
885 0
|
888 |
886 0
|
889 |
887 0
|
890 |
888 0
|
891 |
889 0
|
892 |
890 0
|
893 |
-
891
|
894 |
892 0
|
895 |
893 0
|
896 |
-
894
|
897 |
895 0
|
898 |
896 0
|
899 |
-
897
|
900 |
898 0
|
901 |
899 0
|
902 |
900 0
|
@@ -906,7 +906,7 @@ index prediction
|
|
906 |
904 0
|
907 |
905 0
|
908 |
906 0
|
909 |
-
907
|
910 |
908 0
|
911 |
909 0
|
912 |
910 0
|
@@ -949,12 +949,12 @@ index prediction
|
|
949 |
947 0
|
950 |
948 0
|
951 |
949 0
|
952 |
-
950
|
953 |
951 0
|
954 |
952 0
|
955 |
953 0
|
956 |
954 0
|
957 |
-
955
|
958 |
956 0
|
959 |
957 0
|
960 |
958 0
|
@@ -962,13 +962,13 @@ index prediction
|
|
962 |
960 0
|
963 |
961 0
|
964 |
962 0
|
965 |
-
963
|
966 |
964 0
|
967 |
965 0
|
968 |
966 0
|
969 |
967 0
|
970 |
968 0
|
971 |
-
969
|
972 |
970 0
|
973 |
971 0
|
974 |
972 0
|
@@ -985,13 +985,13 @@ index prediction
|
|
985 |
983 0
|
986 |
984 0
|
987 |
985 0
|
988 |
-
986
|
989 |
987 0
|
990 |
988 0
|
991 |
989 0
|
992 |
990 0
|
993 |
991 1
|
994 |
-
992
|
995 |
993 0
|
996 |
994 0
|
997 |
995 0
|
|
|
1 |
index prediction
|
2 |
0 1
|
3 |
+
1 1
|
4 |
2 1
|
5 |
3 1
|
6 |
+
4 0
|
7 |
5 1
|
8 |
6 1
|
9 |
7 1
|
10 |
8 0
|
11 |
+
9 1
|
12 |
10 1
|
13 |
11 1
|
14 |
12 1
|
15 |
13 1
|
16 |
14 1
|
17 |
+
15 0
|
18 |
16 1
|
19 |
17 1
|
20 |
18 1
|
|
|
23 |
21 1
|
24 |
22 1
|
25 |
23 1
|
26 |
+
24 0
|
27 |
+
25 1
|
28 |
26 1
|
29 |
+
27 1
|
30 |
28 1
|
31 |
+
29 1
|
32 |
30 1
|
33 |
31 1
|
34 |
32 1
|
|
|
38 |
36 1
|
39 |
37 1
|
40 |
38 1
|
41 |
+
39 1
|
42 |
40 1
|
43 |
41 1
|
44 |
42 1
|
45 |
+
43 1
|
46 |
+
44 1
|
47 |
+
45 1
|
48 |
+
46 1
|
49 |
47 1
|
50 |
48 1
|
51 |
49 0
|
52 |
50 1
|
53 |
51 1
|
54 |
+
52 1
|
55 |
53 1
|
56 |
54 1
|
57 |
55 1
|
|
|
63 |
61 1
|
64 |
62 1
|
65 |
63 1
|
66 |
+
64 1
|
67 |
65 1
|
68 |
66 1
|
69 |
67 1
|
|
|
78 |
76 1
|
79 |
77 0
|
80 |
78 1
|
81 |
+
79 1
|
82 |
+
80 1
|
83 |
81 0
|
84 |
82 1
|
85 |
83 1
|
|
|
91 |
89 1
|
92 |
90 1
|
93 |
91 1
|
94 |
+
92 1
|
95 |
93 1
|
96 |
94 1
|
97 |
95 1
|
|
|
104 |
102 1
|
105 |
103 1
|
106 |
104 1
|
107 |
+
105 1
|
108 |
106 1
|
109 |
107 1
|
110 |
108 1
|
|
|
112 |
110 1
|
113 |
111 1
|
114 |
112 1
|
115 |
+
113 0
|
116 |
114 1
|
117 |
115 1
|
118 |
116 1
|
|
|
143 |
141 1
|
144 |
142 1
|
145 |
143 1
|
146 |
+
144 1
|
147 |
145 1
|
148 |
146 1
|
149 |
147 1
|
150 |
148 1
|
151 |
+
149 1
|
152 |
150 1
|
153 |
151 1
|
154 |
152 1
|
|
|
163 |
161 1
|
164 |
162 1
|
165 |
163 1
|
166 |
+
164 1
|
167 |
165 0
|
168 |
166 1
|
169 |
167 1
|
170 |
168 1
|
171 |
+
169 1
|
172 |
+
170 1
|
173 |
171 1
|
174 |
+
172 1
|
175 |
173 0
|
176 |
174 1
|
177 |
175 1
|
178 |
+
176 1
|
179 |
177 0
|
180 |
178 1
|
181 |
179 1
|
182 |
180 1
|
183 |
+
181 0
|
184 |
182 1
|
185 |
183 1
|
186 |
184 1
|
|
|
189 |
187 1
|
190 |
188 1
|
191 |
189 1
|
192 |
+
190 1
|
193 |
191 1
|
194 |
192 1
|
195 |
193 1
|
|
|
197 |
195 1
|
198 |
196 1
|
199 |
197 1
|
200 |
+
198 1
|
201 |
199 0
|
202 |
200 1
|
203 |
201 1
|
|
|
209 |
207 1
|
210 |
208 1
|
211 |
209 1
|
212 |
+
210 1
|
213 |
211 1
|
214 |
212 1
|
215 |
213 1
|
216 |
214 0
|
217 |
215 1
|
218 |
216 0
|
219 |
+
217 0
|
220 |
+
218 1
|
221 |
219 1
|
222 |
220 0
|
223 |
221 1
|
|
|
227 |
225 1
|
228 |
226 0
|
229 |
227 0
|
230 |
+
228 1
|
231 |
+
229 1
|
232 |
+
230 1
|
233 |
231 1
|
234 |
+
232 1
|
235 |
233 1
|
236 |
234 1
|
237 |
235 1
|
|
|
245 |
243 1
|
246 |
244 1
|
247 |
245 1
|
248 |
+
246 0
|
249 |
247 1
|
250 |
+
248 1
|
251 |
+
249 0
|
252 |
250 0
|
253 |
251 1
|
254 |
252 1
|
|
|
296 |
294 1
|
297 |
295 1
|
298 |
296 1
|
299 |
+
297 0
|
300 |
298 0
|
301 |
299 0
|
302 |
300 0
|
|
|
316 |
314 0
|
317 |
315 0
|
318 |
316 0
|
319 |
+
317 0
|
320 |
318 1
|
321 |
319 0
|
322 |
320 0
|
|
|
327 |
325 0
|
328 |
326 0
|
329 |
327 0
|
330 |
+
328 1
|
331 |
329 0
|
332 |
330 1
|
333 |
+
331 1
|
334 |
332 0
|
335 |
333 0
|
336 |
334 0
|
337 |
335 0
|
338 |
336 0
|
339 |
337 0
|
340 |
+
338 1
|
341 |
339 0
|
342 |
340 0
|
343 |
341 0
|
|
|
350 |
348 0
|
351 |
349 0
|
352 |
350 0
|
353 |
+
351 0
|
354 |
352 0
|
355 |
353 0
|
356 |
354 0
|
|
|
363 |
361 0
|
364 |
362 0
|
365 |
363 0
|
366 |
+
364 1
|
367 |
365 0
|
368 |
+
366 1
|
369 |
367 0
|
370 |
368 0
|
371 |
369 0
|
|
|
375 |
373 0
|
376 |
374 0
|
377 |
375 0
|
378 |
+
376 0
|
379 |
377 0
|
380 |
378 0
|
381 |
379 0
|
|
|
400 |
398 1
|
401 |
399 0
|
402 |
400 0
|
403 |
+
401 0
|
404 |
+
402 1
|
405 |
403 0
|
406 |
404 0
|
407 |
405 0
|
|
|
419 |
417 0
|
420 |
418 0
|
421 |
419 0
|
422 |
+
420 1
|
423 |
421 0
|
424 |
422 0
|
425 |
423 0
|
|
|
446 |
444 0
|
447 |
445 0
|
448 |
446 0
|
449 |
+
447 1
|
450 |
448 0
|
451 |
449 0
|
452 |
450 0
|
453 |
451 0
|
454 |
+
452 0
|
455 |
453 0
|
456 |
454 0
|
457 |
455 0
|
|
|
517 |
515 0
|
518 |
516 0
|
519 |
517 0
|
520 |
+
518 0
|
521 |
519 0
|
522 |
520 0
|
523 |
521 0
|
|
|
535 |
533 0
|
536 |
534 0
|
537 |
535 0
|
538 |
+
536 1
|
539 |
537 0
|
540 |
538 0
|
541 |
539 0
|
542 |
540 0
|
543 |
541 0
|
544 |
+
542 0
|
545 |
543 0
|
546 |
544 0
|
547 |
545 0
|
|
|
587 |
585 0
|
588 |
586 0
|
589 |
587 0
|
590 |
+
588 0
|
591 |
589 0
|
592 |
590 0
|
593 |
591 0
|
|
|
595 |
593 0
|
596 |
594 0
|
597 |
595 0
|
598 |
+
596 0
|
599 |
597 0
|
600 |
598 0
|
601 |
599 0
|
|
|
607 |
605 0
|
608 |
606 0
|
609 |
607 0
|
610 |
+
608 1
|
611 |
609 0
|
612 |
610 1
|
613 |
+
611 0
|
614 |
612 0
|
615 |
613 0
|
616 |
614 0
|
|
|
631 |
629 0
|
632 |
630 0
|
633 |
631 0
|
634 |
+
632 1
|
635 |
633 1
|
636 |
634 0
|
637 |
+
635 0
|
638 |
636 0
|
639 |
637 0
|
640 |
638 0
|
|
|
662 |
660 0
|
663 |
661 0
|
664 |
662 0
|
665 |
+
663 1
|
666 |
664 0
|
667 |
665 0
|
668 |
666 0
|
|
|
702 |
700 0
|
703 |
701 0
|
704 |
702 0
|
705 |
+
703 1
|
706 |
704 0
|
707 |
705 0
|
708 |
706 0
|
|
|
751 |
749 0
|
752 |
750 0
|
753 |
751 0
|
754 |
+
752 1
|
755 |
753 0
|
756 |
754 0
|
757 |
755 0
|
|
|
764 |
762 0
|
765 |
763 0
|
766 |
764 0
|
767 |
+
765 0
|
768 |
766 0
|
769 |
767 0
|
770 |
768 0
|
771 |
769 0
|
772 |
+
770 1
|
773 |
771 0
|
774 |
772 0
|
775 |
773 0
|
|
|
794 |
792 0
|
795 |
793 0
|
796 |
794 0
|
797 |
+
795 1
|
798 |
796 0
|
799 |
797 0
|
800 |
798 0
|
|
|
808 |
806 0
|
809 |
807 0
|
810 |
808 0
|
811 |
+
809 0
|
812 |
810 0
|
813 |
811 0
|
814 |
812 0
|
|
|
864 |
862 0
|
865 |
863 0
|
866 |
864 0
|
867 |
+
865 0
|
868 |
866 0
|
869 |
867 0
|
870 |
+
868 1
|
871 |
869 0
|
872 |
870 0
|
873 |
871 0
|
|
|
883 |
881 0
|
884 |
882 0
|
885 |
883 0
|
886 |
+
884 0
|
887 |
885 0
|
888 |
886 0
|
889 |
887 0
|
890 |
888 0
|
891 |
889 0
|
892 |
890 0
|
893 |
+
891 1
|
894 |
892 0
|
895 |
893 0
|
896 |
+
894 0
|
897 |
895 0
|
898 |
896 0
|
899 |
+
897 0
|
900 |
898 0
|
901 |
899 0
|
902 |
900 0
|
|
|
906 |
904 0
|
907 |
905 0
|
908 |
906 0
|
909 |
+
907 1
|
910 |
908 0
|
911 |
909 0
|
912 |
910 0
|
|
|
949 |
947 0
|
950 |
948 0
|
951 |
949 0
|
952 |
+
950 0
|
953 |
951 0
|
954 |
952 0
|
955 |
953 0
|
956 |
954 0
|
957 |
+
955 1
|
958 |
956 0
|
959 |
957 0
|
960 |
958 0
|
|
|
962 |
960 0
|
963 |
961 0
|
964 |
962 0
|
965 |
+
963 1
|
966 |
964 0
|
967 |
965 0
|
968 |
966 0
|
969 |
967 0
|
970 |
968 0
|
971 |
+
969 0
|
972 |
970 0
|
973 |
971 0
|
974 |
972 0
|
|
|
985 |
983 0
|
986 |
984 0
|
987 |
985 0
|
988 |
+
986 1
|
989 |
987 0
|
990 |
988 0
|
991 |
989 0
|
992 |
990 0
|
993 |
991 1
|
994 |
+
992 0
|
995 |
993 0
|
996 |
994 0
|
997 |
995 0
|
runs/May14_19-19-36_indolem-petl-vm/events.out.tfevents.1715717191.indolem-petl-vm.306885.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da2f2a468694ab3e44b6d4566861949e44aa1ddbbce5c0833bc08516b0b806de
|
3 |
+
size 560
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 3638,
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second":
|
8 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 20.0,
|
3 |
+
"train_loss": 0.06173487283655855,
|
4 |
+
"train_runtime": 2765.1299,
|
5 |
"train_samples": 3638,
|
6 |
+
"train_samples_per_second": 26.313,
|
7 |
+
"train_steps_per_second": 0.882
|
8 |
}
|
trainer_state.json
CHANGED
@@ -1,48 +1,409 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate":
|
15 |
-
"loss": 0.
|
16 |
"step": 122
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
-
"eval_accuracy": 0.
|
21 |
-
"eval_f1": 0.
|
22 |
-
"eval_loss": 0.
|
23 |
-
"eval_precision": 0.
|
24 |
-
"eval_recall": 0.
|
25 |
-
"eval_runtime": 4.
|
26 |
-
"eval_samples_per_second":
|
27 |
-
"eval_steps_per_second": 10.
|
28 |
"step": 122
|
29 |
},
|
30 |
{
|
31 |
-
"epoch":
|
32 |
-
"
|
33 |
-
"
|
34 |
-
"
|
35 |
-
"
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
}
|
39 |
],
|
40 |
"logging_steps": 500,
|
41 |
-
"max_steps":
|
42 |
"num_input_tokens_seen": 0,
|
43 |
-
"num_train_epochs":
|
44 |
"save_steps": 500,
|
45 |
-
"total_flos":
|
46 |
"train_batch_size": 30,
|
47 |
"trial_name": null,
|
48 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 20.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2440,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"grad_norm": 4.053804397583008,
|
14 |
+
"learning_rate": 4.75e-05,
|
15 |
+
"loss": 0.4355,
|
16 |
"step": 122
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.8696741854636592,
|
21 |
+
"eval_f1": 0.835906358747232,
|
22 |
+
"eval_loss": 0.3243214786052704,
|
23 |
+
"eval_precision": 0.853844109243139,
|
24 |
+
"eval_recall": 0.8227859610838335,
|
25 |
+
"eval_runtime": 4.9832,
|
26 |
+
"eval_samples_per_second": 80.069,
|
27 |
+
"eval_steps_per_second": 10.034,
|
28 |
"step": 122
|
29 |
},
|
30 |
{
|
31 |
+
"epoch": 2.0,
|
32 |
+
"grad_norm": 23.150257110595703,
|
33 |
+
"learning_rate": 4.5e-05,
|
34 |
+
"loss": 0.2295,
|
35 |
+
"step": 244
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"epoch": 2.0,
|
39 |
+
"eval_accuracy": 0.8897243107769424,
|
40 |
+
"eval_f1": 0.8701248742380304,
|
41 |
+
"eval_loss": 0.3046626150608063,
|
42 |
+
"eval_precision": 0.8624507874015748,
|
43 |
+
"eval_recall": 0.8794780869248955,
|
44 |
+
"eval_runtime": 4.975,
|
45 |
+
"eval_samples_per_second": 80.2,
|
46 |
+
"eval_steps_per_second": 10.05,
|
47 |
+
"step": 244
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 3.0,
|
51 |
+
"grad_norm": 0.44390636682510376,
|
52 |
+
"learning_rate": 4.25e-05,
|
53 |
+
"loss": 0.1337,
|
54 |
+
"step": 366
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 3.0,
|
58 |
+
"eval_accuracy": 0.899749373433584,
|
59 |
+
"eval_f1": 0.879667048676036,
|
60 |
+
"eval_loss": 0.3747338354587555,
|
61 |
+
"eval_precision": 0.8778361344537815,
|
62 |
+
"eval_recall": 0.8815693762502272,
|
63 |
+
"eval_runtime": 4.9774,
|
64 |
+
"eval_samples_per_second": 80.162,
|
65 |
+
"eval_steps_per_second": 10.045,
|
66 |
+
"step": 366
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"epoch": 4.0,
|
70 |
+
"grad_norm": 15.949886322021484,
|
71 |
+
"learning_rate": 4e-05,
|
72 |
+
"loss": 0.1038,
|
73 |
+
"step": 488
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"epoch": 4.0,
|
77 |
+
"eval_accuracy": 0.8822055137844611,
|
78 |
+
"eval_f1": 0.8651222336500356,
|
79 |
+
"eval_loss": 0.41882890462875366,
|
80 |
+
"eval_precision": 0.8518339768339769,
|
81 |
+
"eval_recall": 0.8866612111292962,
|
82 |
+
"eval_runtime": 5.0099,
|
83 |
+
"eval_samples_per_second": 79.643,
|
84 |
+
"eval_steps_per_second": 9.98,
|
85 |
+
"step": 488
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 5.0,
|
89 |
+
"grad_norm": 3.7783217430114746,
|
90 |
+
"learning_rate": 3.7500000000000003e-05,
|
91 |
+
"loss": 0.072,
|
92 |
+
"step": 610
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"epoch": 5.0,
|
96 |
+
"eval_accuracy": 0.8872180451127819,
|
97 |
+
"eval_f1": 0.8622036668943447,
|
98 |
+
"eval_loss": 0.6270534992218018,
|
99 |
+
"eval_precision": 0.8671602787456446,
|
100 |
+
"eval_recall": 0.8577014002545917,
|
101 |
+
"eval_runtime": 5.0204,
|
102 |
+
"eval_samples_per_second": 79.476,
|
103 |
+
"eval_steps_per_second": 9.959,
|
104 |
+
"step": 610
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 6.0,
|
108 |
+
"grad_norm": 44.57243347167969,
|
109 |
+
"learning_rate": 3.5e-05,
|
110 |
+
"loss": 0.0462,
|
111 |
+
"step": 732
|
112 |
+
},
|
113 |
+
{
|
114 |
+
"epoch": 6.0,
|
115 |
+
"eval_accuracy": 0.8897243107769424,
|
116 |
+
"eval_f1": 0.8695225637671682,
|
117 |
+
"eval_loss": 0.6129250526428223,
|
118 |
+
"eval_precision": 0.8631532846715328,
|
119 |
+
"eval_recall": 0.8769776322967813,
|
120 |
+
"eval_runtime": 4.9643,
|
121 |
+
"eval_samples_per_second": 80.375,
|
122 |
+
"eval_steps_per_second": 10.072,
|
123 |
+
"step": 732
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"epoch": 7.0,
|
127 |
+
"grad_norm": 0.024074144661426544,
|
128 |
+
"learning_rate": 3.2500000000000004e-05,
|
129 |
+
"loss": 0.0459,
|
130 |
+
"step": 854
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"epoch": 7.0,
|
134 |
+
"eval_accuracy": 0.8897243107769424,
|
135 |
+
"eval_f1": 0.8649122807017544,
|
136 |
+
"eval_loss": 0.5890637636184692,
|
137 |
+
"eval_precision": 0.8710116366366366,
|
138 |
+
"eval_recall": 0.8594744498999818,
|
139 |
+
"eval_runtime": 4.9832,
|
140 |
+
"eval_samples_per_second": 80.069,
|
141 |
+
"eval_steps_per_second": 10.034,
|
142 |
+
"step": 854
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 8.0,
|
146 |
+
"grad_norm": 0.022918157279491425,
|
147 |
+
"learning_rate": 3e-05,
|
148 |
+
"loss": 0.0391,
|
149 |
+
"step": 976
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 8.0,
|
153 |
+
"eval_accuracy": 0.8872180451127819,
|
154 |
+
"eval_f1": 0.8680720368560659,
|
155 |
+
"eval_loss": 0.5972921252250671,
|
156 |
+
"eval_precision": 0.8587217615098657,
|
157 |
+
"eval_recall": 0.8802054919076197,
|
158 |
+
"eval_runtime": 5.0003,
|
159 |
+
"eval_samples_per_second": 79.795,
|
160 |
+
"eval_steps_per_second": 9.999,
|
161 |
+
"step": 976
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"epoch": 9.0,
|
165 |
+
"grad_norm": 106.23094177246094,
|
166 |
+
"learning_rate": 2.7500000000000004e-05,
|
167 |
+
"loss": 0.0307,
|
168 |
+
"step": 1098
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"epoch": 9.0,
|
172 |
+
"eval_accuracy": 0.87468671679198,
|
173 |
+
"eval_f1": 0.8584865509022812,
|
174 |
+
"eval_loss": 0.7086873054504395,
|
175 |
+
"eval_precision": 0.8441043083900227,
|
176 |
+
"eval_recall": 0.8863429714493545,
|
177 |
+
"eval_runtime": 4.9859,
|
178 |
+
"eval_samples_per_second": 80.026,
|
179 |
+
"eval_steps_per_second": 10.028,
|
180 |
+
"step": 1098
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"epoch": 10.0,
|
184 |
+
"grad_norm": 0.024997469037771225,
|
185 |
+
"learning_rate": 2.5e-05,
|
186 |
+
"loss": 0.0199,
|
187 |
+
"step": 1220
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"epoch": 10.0,
|
191 |
+
"eval_accuracy": 0.8972431077694235,
|
192 |
+
"eval_f1": 0.8717112228173498,
|
193 |
+
"eval_loss": 0.7264124155044556,
|
194 |
+
"eval_precision": 0.8869295958279009,
|
195 |
+
"eval_recall": 0.8597926895799237,
|
196 |
+
"eval_runtime": 4.9651,
|
197 |
+
"eval_samples_per_second": 80.361,
|
198 |
+
"eval_steps_per_second": 10.07,
|
199 |
+
"step": 1220
|
200 |
+
},
|
201 |
+
{
|
202 |
+
"epoch": 11.0,
|
203 |
+
"grad_norm": 0.004392046481370926,
|
204 |
+
"learning_rate": 2.25e-05,
|
205 |
+
"loss": 0.0105,
|
206 |
+
"step": 1342
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"epoch": 11.0,
|
210 |
+
"eval_accuracy": 0.8972431077694235,
|
211 |
+
"eval_f1": 0.8757339815412664,
|
212 |
+
"eval_loss": 0.6738360524177551,
|
213 |
+
"eval_precision": 0.8766906299500427,
|
214 |
+
"eval_recall": 0.8747954173486088,
|
215 |
+
"eval_runtime": 5.0179,
|
216 |
+
"eval_samples_per_second": 79.516,
|
217 |
+
"eval_steps_per_second": 9.964,
|
218 |
+
"step": 1342
|
219 |
+
},
|
220 |
+
{
|
221 |
+
"epoch": 12.0,
|
222 |
+
"grad_norm": 0.004026818089187145,
|
223 |
+
"learning_rate": 2e-05,
|
224 |
+
"loss": 0.0131,
|
225 |
+
"step": 1464
|
226 |
+
},
|
227 |
+
{
|
228 |
+
"epoch": 12.0,
|
229 |
+
"eval_accuracy": 0.899749373433584,
|
230 |
+
"eval_f1": 0.882467302933899,
|
231 |
+
"eval_loss": 0.7488105297088623,
|
232 |
+
"eval_precision": 0.8732988802756245,
|
233 |
+
"eval_recall": 0.8940716493907983,
|
234 |
+
"eval_runtime": 5.0007,
|
235 |
+
"eval_samples_per_second": 79.788,
|
236 |
+
"eval_steps_per_second": 9.999,
|
237 |
+
"step": 1464
|
238 |
+
},
|
239 |
+
{
|
240 |
+
"epoch": 13.0,
|
241 |
+
"grad_norm": 0.004543425515294075,
|
242 |
+
"learning_rate": 1.75e-05,
|
243 |
+
"loss": 0.0102,
|
244 |
+
"step": 1586
|
245 |
+
},
|
246 |
+
{
|
247 |
+
"epoch": 13.0,
|
248 |
+
"eval_accuracy": 0.8972431077694235,
|
249 |
+
"eval_f1": 0.8792560061999484,
|
250 |
+
"eval_loss": 0.7154756784439087,
|
251 |
+
"eval_precision": 0.8707622232472325,
|
252 |
+
"eval_recall": 0.889798145117294,
|
253 |
+
"eval_runtime": 5.0136,
|
254 |
+
"eval_samples_per_second": 79.584,
|
255 |
+
"eval_steps_per_second": 9.973,
|
256 |
+
"step": 1586
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"epoch": 14.0,
|
260 |
+
"grad_norm": 0.0037931231781840324,
|
261 |
+
"learning_rate": 1.5e-05,
|
262 |
+
"loss": 0.0061,
|
263 |
+
"step": 1708
|
264 |
+
},
|
265 |
+
{
|
266 |
+
"epoch": 14.0,
|
267 |
+
"eval_accuracy": 0.9072681704260651,
|
268 |
+
"eval_f1": 0.8894993300948346,
|
269 |
+
"eval_loss": 0.7196279168128967,
|
270 |
+
"eval_precision": 0.8850535598035154,
|
271 |
+
"eval_recall": 0.8943898890707401,
|
272 |
+
"eval_runtime": 5.009,
|
273 |
+
"eval_samples_per_second": 79.657,
|
274 |
+
"eval_steps_per_second": 9.982,
|
275 |
+
"step": 1708
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 15.0,
|
279 |
+
"grad_norm": 0.0027608012314885855,
|
280 |
+
"learning_rate": 1.25e-05,
|
281 |
+
"loss": 0.0138,
|
282 |
+
"step": 1830
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 15.0,
|
286 |
+
"eval_accuracy": 0.9022556390977443,
|
287 |
+
"eval_f1": 0.884617951284618,
|
288 |
+
"eval_loss": 0.7618029713630676,
|
289 |
+
"eval_precision": 0.8772893772893773,
|
290 |
+
"eval_recall": 0.8933442444080741,
|
291 |
+
"eval_runtime": 5.0251,
|
292 |
+
"eval_samples_per_second": 79.401,
|
293 |
+
"eval_steps_per_second": 9.95,
|
294 |
+
"step": 1830
|
295 |
+
},
|
296 |
+
{
|
297 |
+
"epoch": 16.0,
|
298 |
+
"grad_norm": 0.0022813216783106327,
|
299 |
+
"learning_rate": 1e-05,
|
300 |
+
"loss": 0.0075,
|
301 |
+
"step": 1952
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"epoch": 16.0,
|
305 |
+
"eval_accuracy": 0.9047619047619048,
|
306 |
+
"eval_f1": 0.8873149414352814,
|
307 |
+
"eval_loss": 0.7252941727638245,
|
308 |
+
"eval_precision": 0.8806277372262774,
|
309 |
+
"eval_recall": 0.8951172940534643,
|
310 |
+
"eval_runtime": 5.0138,
|
311 |
+
"eval_samples_per_second": 79.581,
|
312 |
+
"eval_steps_per_second": 9.973,
|
313 |
+
"step": 1952
|
314 |
+
},
|
315 |
+
{
|
316 |
+
"epoch": 17.0,
|
317 |
+
"grad_norm": 15.638340950012207,
|
318 |
+
"learning_rate": 7.5e-06,
|
319 |
+
"loss": 0.0063,
|
320 |
+
"step": 2074
|
321 |
+
},
|
322 |
+
{
|
323 |
+
"epoch": 17.0,
|
324 |
+
"eval_accuracy": 0.9022556390977443,
|
325 |
+
"eval_f1": 0.8840781602687784,
|
326 |
+
"eval_loss": 0.7560042142868042,
|
327 |
+
"eval_precision": 0.87816715542522,
|
328 |
+
"eval_recall": 0.89084378977996,
|
329 |
+
"eval_runtime": 5.0052,
|
330 |
+
"eval_samples_per_second": 79.718,
|
331 |
+
"eval_steps_per_second": 9.99,
|
332 |
+
"step": 2074
|
333 |
+
},
|
334 |
+
{
|
335 |
+
"epoch": 18.0,
|
336 |
+
"grad_norm": 0.002121408935636282,
|
337 |
+
"learning_rate": 5e-06,
|
338 |
+
"loss": 0.0066,
|
339 |
+
"step": 2196
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"epoch": 18.0,
|
343 |
+
"eval_accuracy": 0.9022556390977443,
|
344 |
+
"eval_f1": 0.8856624319419237,
|
345 |
+
"eval_loss": 0.748332142829895,
|
346 |
+
"eval_precision": 0.8758364312267658,
|
347 |
+
"eval_recall": 0.8983451536643026,
|
348 |
+
"eval_runtime": 4.9788,
|
349 |
+
"eval_samples_per_second": 80.139,
|
350 |
+
"eval_steps_per_second": 10.043,
|
351 |
+
"step": 2196
|
352 |
+
},
|
353 |
+
{
|
354 |
+
"epoch": 19.0,
|
355 |
+
"grad_norm": 0.004570267163217068,
|
356 |
+
"learning_rate": 2.5e-06,
|
357 |
+
"loss": 0.0023,
|
358 |
+
"step": 2318
|
359 |
+
},
|
360 |
+
{
|
361 |
+
"epoch": 19.0,
|
362 |
+
"eval_accuracy": 0.9022556390977443,
|
363 |
+
"eval_f1": 0.884617951284618,
|
364 |
+
"eval_loss": 0.7535205483436584,
|
365 |
+
"eval_precision": 0.8772893772893773,
|
366 |
+
"eval_recall": 0.8933442444080741,
|
367 |
+
"eval_runtime": 5.0013,
|
368 |
+
"eval_samples_per_second": 79.779,
|
369 |
+
"eval_steps_per_second": 9.997,
|
370 |
+
"step": 2318
|
371 |
+
},
|
372 |
+
{
|
373 |
+
"epoch": 20.0,
|
374 |
+
"grad_norm": 0.0018295175395905972,
|
375 |
+
"learning_rate": 0.0,
|
376 |
+
"loss": 0.0021,
|
377 |
+
"step": 2440
|
378 |
+
},
|
379 |
+
{
|
380 |
+
"epoch": 20.0,
|
381 |
+
"eval_accuracy": 0.9047619047619048,
|
382 |
+
"eval_f1": 0.8878351186601172,
|
383 |
+
"eval_loss": 0.7535876035690308,
|
384 |
+
"eval_precision": 0.879776516905975,
|
385 |
+
"eval_recall": 0.8976177486815784,
|
386 |
+
"eval_runtime": 5.0187,
|
387 |
+
"eval_samples_per_second": 79.503,
|
388 |
+
"eval_steps_per_second": 9.963,
|
389 |
+
"step": 2440
|
390 |
+
},
|
391 |
+
{
|
392 |
+
"epoch": 20.0,
|
393 |
+
"step": 2440,
|
394 |
+
"total_flos": 7584162436176000.0,
|
395 |
+
"train_loss": 0.06173487283655855,
|
396 |
+
"train_runtime": 2765.1299,
|
397 |
+
"train_samples_per_second": 26.313,
|
398 |
+
"train_steps_per_second": 0.882
|
399 |
}
|
400 |
],
|
401 |
"logging_steps": 500,
|
402 |
+
"max_steps": 2440,
|
403 |
"num_input_tokens_seen": 0,
|
404 |
+
"num_train_epochs": 20,
|
405 |
"save_steps": 500,
|
406 |
+
"total_flos": 7584162436176000.0,
|
407 |
"train_batch_size": 30,
|
408 |
"trial_name": null,
|
409 |
"trial_params": null
|