SonishMaharjan commited on
Commit
738599a
1 Parent(s): b1069a7

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +4 -4
  2. train_results.json +4 -4
  3. trainer_state.json +56 -56
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.94,
3
  "total_flos": 1.1265748270391624e+18,
4
- "train_loss": 0.6647836283633584,
5
- "train_runtime": 6473.0815,
6
- "train_samples_per_second": 2.286,
7
- "train_steps_per_second": 0.018
8
  }
 
1
  {
2
  "epoch": 2.94,
3
  "total_flos": 1.1265748270391624e+18,
4
+ "train_loss": 0.2613685036960401,
5
+ "train_runtime": 863.1868,
6
+ "train_samples_per_second": 17.141,
7
+ "train_steps_per_second": 0.132
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.94,
3
  "total_flos": 1.1265748270391624e+18,
4
- "train_loss": 0.6647836283633584,
5
- "train_runtime": 6473.0815,
6
- "train_samples_per_second": 2.286,
7
- "train_steps_per_second": 0.018
8
  }
 
1
  {
2
  "epoch": 2.94,
3
  "total_flos": 1.1265748270391624e+18,
4
+ "train_loss": 0.2613685036960401,
5
+ "train_runtime": 863.1868,
6
+ "train_samples_per_second": 17.141,
7
+ "train_steps_per_second": 0.132
8
  }
trainer_state.json CHANGED
@@ -11,125 +11,125 @@
11
  {
12
  "epoch": 0.03,
13
  "learning_rate": 4.166666666666667e-06,
14
- "loss": 1.3431,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.31,
19
  "learning_rate": 5e-05,
20
- "loss": 1.226,
21
  "step": 12
22
  },
23
  {
24
  "epoch": 0.62,
25
  "learning_rate": 4.411764705882353e-05,
26
- "loss": 0.9888,
27
  "step": 24
28
  },
29
  {
30
  "epoch": 0.93,
31
  "learning_rate": 3.8235294117647055e-05,
32
- "loss": 0.8637,
33
  "step": 36
34
  },
35
  {
36
  "epoch": 0.98,
37
- "eval_Macro F1": 0.5661973571975384,
38
- "eval_Macro Precision": 0.7510967291822963,
39
- "eval_Macro Recall": 0.6473652579844594,
40
- "eval_Micro F1": 0.6778127458693942,
41
- "eval_Micro Precision": 0.6778127458693942,
42
- "eval_Micro Recall": 0.6778127458693942,
43
- "eval_Weighted F1": 0.5949172262865288,
44
- "eval_Weighted Precision": 0.7505686242573377,
45
- "eval_Weighted Recall": 0.6778127458693942,
46
- "eval_accuracy": 0.6778127458693942,
47
- "eval_loss": 0.598620593547821,
48
- "eval_runtime": 1926.4978,
49
- "eval_samples_per_second": 1.319,
50
- "eval_steps_per_second": 0.042,
51
  "step": 38
52
  },
53
  {
54
  "epoch": 1.24,
55
  "learning_rate": 3.235294117647059e-05,
56
- "loss": 0.6928,
57
  "step": 48
58
  },
59
  {
60
  "epoch": 1.55,
61
  "learning_rate": 2.647058823529412e-05,
62
- "loss": 0.6054,
63
  "step": 60
64
  },
65
  {
66
  "epoch": 1.86,
67
  "learning_rate": 2.058823529411765e-05,
68
- "loss": 0.5018,
69
  "step": 72
70
  },
71
  {
72
  "epoch": 1.99,
73
- "eval_Macro F1": 0.8376972138667833,
74
- "eval_Macro Precision": 0.8622787078319065,
75
- "eval_Macro Recall": 0.8344563153022619,
76
- "eval_Micro F1": 0.8516915814319432,
77
- "eval_Micro Precision": 0.8516915814319433,
78
- "eval_Micro Recall": 0.8516915814319433,
79
- "eval_Weighted F1": 0.8452584084591334,
80
- "eval_Weighted Precision": 0.8588424441061323,
81
- "eval_Weighted Recall": 0.8516915814319433,
82
- "eval_accuracy": 0.8516915814319433,
83
- "eval_loss": 0.270693838596344,
84
- "eval_runtime": 63.8305,
85
- "eval_samples_per_second": 39.824,
86
- "eval_steps_per_second": 1.253,
87
  "step": 77
88
  },
89
  {
90
  "epoch": 2.17,
91
  "learning_rate": 1.4705882352941177e-05,
92
- "loss": 0.4449,
93
  "step": 84
94
  },
95
  {
96
  "epoch": 2.48,
97
  "learning_rate": 8.823529411764707e-06,
98
- "loss": 0.4168,
99
  "step": 96
100
  },
101
  {
102
  "epoch": 2.79,
103
  "learning_rate": 2.9411764705882355e-06,
104
- "loss": 0.3761,
105
  "step": 108
106
  },
107
  {
108
  "epoch": 2.94,
109
- "eval_Macro F1": 0.8640166757623045,
110
- "eval_Macro Precision": 0.8744992381555277,
111
- "eval_Macro Recall": 0.8626385930557097,
112
- "eval_Micro F1": 0.8741148701809599,
113
- "eval_Micro Precision": 0.8741148701809599,
114
- "eval_Micro Recall": 0.8741148701809599,
115
- "eval_Weighted F1": 0.8719933502245091,
116
- "eval_Weighted Precision": 0.8780283114822369,
117
- "eval_Weighted Recall": 0.8741148701809599,
118
- "eval_accuracy": 0.8741148701809599,
119
- "eval_loss": 0.22845804691314697,
120
- "eval_runtime": 63.2765,
121
- "eval_samples_per_second": 40.173,
122
- "eval_steps_per_second": 1.264,
123
  "step": 114
124
  },
125
  {
126
  "epoch": 2.94,
127
  "step": 114,
128
  "total_flos": 1.1265748270391624e+18,
129
- "train_loss": 0.6647836283633584,
130
- "train_runtime": 6473.0815,
131
- "train_samples_per_second": 2.286,
132
- "train_steps_per_second": 0.018
133
  }
134
  ],
135
  "logging_steps": 12,
 
11
  {
12
  "epoch": 0.03,
13
  "learning_rate": 4.166666666666667e-06,
14
+ "loss": 0.3991,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.31,
19
  "learning_rate": 5e-05,
20
+ "loss": 0.3808,
21
  "step": 12
22
  },
23
  {
24
  "epoch": 0.62,
25
  "learning_rate": 4.411764705882353e-05,
26
+ "loss": 0.3474,
27
  "step": 24
28
  },
29
  {
30
  "epoch": 0.93,
31
  "learning_rate": 3.8235294117647055e-05,
32
+ "loss": 0.3093,
33
  "step": 36
34
  },
35
  {
36
  "epoch": 0.98,
37
+ "eval_Macro F1": 0.8820015735129666,
38
+ "eval_Macro Precision": 0.899385909210207,
39
+ "eval_Macro Recall": 0.8738252833106378,
40
+ "eval_Micro F1": 0.8890637293469709,
41
+ "eval_Micro Precision": 0.8890637293469709,
42
+ "eval_Micro Recall": 0.8890637293469709,
43
+ "eval_Weighted F1": 0.8879137250311478,
44
+ "eval_Weighted Precision": 0.8952120340834557,
45
+ "eval_Weighted Recall": 0.8890637293469709,
46
+ "eval_accuracy": 0.8890637293469709,
47
+ "eval_loss": 0.2252058982849121,
48
+ "eval_runtime": 64.7982,
49
+ "eval_samples_per_second": 39.229,
50
+ "eval_steps_per_second": 1.235,
51
  "step": 38
52
  },
53
  {
54
  "epoch": 1.24,
55
  "learning_rate": 3.235294117647059e-05,
56
+ "loss": 0.2358,
57
  "step": 48
58
  },
59
  {
60
  "epoch": 1.55,
61
  "learning_rate": 2.647058823529412e-05,
62
+ "loss": 0.2391,
63
  "step": 60
64
  },
65
  {
66
  "epoch": 1.86,
67
  "learning_rate": 2.058823529411765e-05,
68
+ "loss": 0.2278,
69
  "step": 72
70
  },
71
  {
72
  "epoch": 1.99,
73
+ "eval_Macro F1": 0.9220395057673447,
74
+ "eval_Macro Precision": 0.9240847889405631,
75
+ "eval_Macro Recall": 0.9220981981645411,
76
+ "eval_Micro F1": 0.9291896144767899,
77
+ "eval_Micro Precision": 0.9291896144767899,
78
+ "eval_Micro Recall": 0.9291896144767899,
79
+ "eval_Weighted F1": 0.929177891947533,
80
+ "eval_Weighted Precision": 0.9309640796953544,
81
+ "eval_Weighted Recall": 0.9291896144767899,
82
+ "eval_accuracy": 0.9291896144767899,
83
+ "eval_loss": 0.16484124958515167,
84
+ "eval_runtime": 64.4008,
85
+ "eval_samples_per_second": 39.472,
86
+ "eval_steps_per_second": 1.242,
87
  "step": 77
88
  },
89
  {
90
  "epoch": 2.17,
91
  "learning_rate": 1.4705882352941177e-05,
92
+ "loss": 0.1989,
93
  "step": 84
94
  },
95
  {
96
  "epoch": 2.48,
97
  "learning_rate": 8.823529411764707e-06,
98
+ "loss": 0.2251,
99
  "step": 96
100
  },
101
  {
102
  "epoch": 2.79,
103
  "learning_rate": 2.9411764705882355e-06,
104
+ "loss": 0.2066,
105
  "step": 108
106
  },
107
  {
108
  "epoch": 2.94,
109
+ "eval_Macro F1": 0.9386216440513059,
110
+ "eval_Macro Precision": 0.9382250515595165,
111
+ "eval_Macro Recall": 0.9403801182311695,
112
+ "eval_Micro F1": 0.9425649095200629,
113
+ "eval_Micro Precision": 0.9425649095200629,
114
+ "eval_Micro Recall": 0.9425649095200629,
115
+ "eval_Weighted F1": 0.9426485199157074,
116
+ "eval_Weighted Precision": 0.9439679024820588,
117
+ "eval_Weighted Recall": 0.9425649095200629,
118
+ "eval_accuracy": 0.9425649095200629,
119
+ "eval_loss": 0.13585534691810608,
120
+ "eval_runtime": 63.8941,
121
+ "eval_samples_per_second": 39.785,
122
+ "eval_steps_per_second": 1.252,
123
  "step": 114
124
  },
125
  {
126
  "epoch": 2.94,
127
  "step": 114,
128
  "total_flos": 1.1265748270391624e+18,
129
+ "train_loss": 0.2613685036960401,
130
+ "train_runtime": 863.1868,
131
+ "train_samples_per_second": 17.141,
132
+ "train_steps_per_second": 0.132
133
  }
134
  ],
135
  "logging_steps": 12,