SonishMaharjan commited on
Commit
1dcea62
1 Parent(s): b21a994

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +6 -6
  2. train_results.json +6 -6
  3. trainer_state.json +155 -95
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.94,
3
- "total_flos": 1.1265748270391624e+18,
4
- "train_loss": 0.16734615334293299,
5
- "train_runtime": 848.5959,
6
- "train_samples_per_second": 17.436,
7
- "train_steps_per_second": 0.134
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "total_flos": 2.320189226545914e+18,
4
+ "train_loss": 0.18204877430047745,
5
+ "train_runtime": 4369.7043,
6
+ "train_samples_per_second": 6.851,
7
+ "train_steps_per_second": 0.054
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.94,
3
- "total_flos": 1.1265748270391624e+18,
4
- "train_loss": 0.16734615334293299,
5
- "train_runtime": 848.5959,
6
- "train_samples_per_second": 17.436,
7
- "train_steps_per_second": 0.134
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "total_flos": 2.320189226545914e+18,
4
+ "train_loss": 0.18204877430047745,
5
+ "train_runtime": 4369.7043,
6
+ "train_samples_per_second": 6.851,
7
+ "train_steps_per_second": 0.054
8
  }
trainer_state.json CHANGED
@@ -1,142 +1,202 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.9419354838709677,
5
  "eval_steps": 500,
6
- "global_step": 114,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.03,
13
- "learning_rate": 4.166666666666667e-06,
14
- "loss": 0.1438,
15
  "step": 1
16
  },
17
  {
18
- "epoch": 0.31,
19
- "learning_rate": 5e-05,
20
- "loss": 0.146,
21
  "step": 12
22
  },
23
  {
24
- "epoch": 0.62,
25
- "learning_rate": 4.411764705882353e-05,
26
- "loss": 0.1822,
27
  "step": 24
28
  },
29
  {
30
- "epoch": 0.93,
31
- "learning_rate": 3.8235294117647055e-05,
32
- "loss": 0.1916,
33
  "step": 36
34
  },
35
  {
36
- "epoch": 0.98,
37
- "eval_Macro F1": 0.9408451121862865,
38
- "eval_Macro Precision": 0.9411664777049882,
39
- "eval_Macro Recall": 0.9427118741264285,
40
- "eval_Micro F1": 0.9461054287962234,
41
- "eval_Micro Precision": 0.9461054287962234,
42
- "eval_Micro Recall": 0.9461054287962234,
43
- "eval_Weighted F1": 0.9464960091472664,
44
- "eval_Weighted Precision": 0.9487006349374144,
45
- "eval_Weighted Recall": 0.9461054287962234,
46
- "eval_accuracy": 0.9461054287962234,
47
- "eval_loss": 0.13961628079414368,
48
- "eval_runtime": 63.2832,
49
- "eval_samples_per_second": 40.169,
50
- "eval_steps_per_second": 1.264,
51
- "step": 38
52
- },
53
- {
54
- "epoch": 1.24,
55
- "learning_rate": 3.235294117647059e-05,
56
- "loss": 0.1565,
57
  "step": 48
58
  },
59
  {
60
- "epoch": 1.55,
61
- "learning_rate": 2.647058823529412e-05,
62
- "loss": 0.1692,
63
  "step": 60
64
  },
65
  {
66
- "epoch": 1.86,
67
- "learning_rate": 2.058823529411765e-05,
68
- "loss": 0.1597,
69
  "step": 72
70
  },
71
  {
72
- "epoch": 1.99,
73
- "eval_Macro F1": 0.9484522652918805,
74
- "eval_Macro Precision": 0.9471910825399321,
75
- "eval_Macro Recall": 0.95148334257645,
76
- "eval_Micro F1": 0.9520062942564909,
77
- "eval_Micro Precision": 0.952006294256491,
78
- "eval_Micro Recall": 0.952006294256491,
79
- "eval_Weighted F1": 0.9522977862410118,
80
- "eval_Weighted Precision": 0.9540997275393984,
81
- "eval_Weighted Recall": 0.952006294256491,
82
- "eval_accuracy": 0.952006294256491,
83
- "eval_loss": 0.12265925854444504,
84
- "eval_runtime": 64.9371,
85
- "eval_samples_per_second": 39.146,
86
- "eval_steps_per_second": 1.232,
87
- "step": 77
88
- },
89
- {
90
- "epoch": 2.17,
91
- "learning_rate": 1.4705882352941177e-05,
92
- "loss": 0.1619,
93
  "step": 84
94
  },
95
  {
96
- "epoch": 2.48,
97
- "learning_rate": 8.823529411764707e-06,
98
- "loss": 0.1611,
99
  "step": 96
100
  },
101
  {
102
- "epoch": 2.79,
103
- "learning_rate": 2.9411764705882355e-06,
104
- "loss": 0.1722,
105
  "step": 108
106
  },
107
  {
108
- "epoch": 2.94,
109
- "eval_Macro F1": 0.947301058449996,
110
- "eval_Macro Precision": 0.9458287584858323,
111
- "eval_Macro Recall": 0.9498167521420487,
112
- "eval_Micro F1": 0.9512195121951219,
113
- "eval_Micro Precision": 0.9512195121951219,
114
- "eval_Micro Recall": 0.9512195121951219,
115
- "eval_Weighted F1": 0.9515018097434871,
116
- "eval_Weighted Precision": 0.9527033488356239,
117
- "eval_Weighted Recall": 0.9512195121951219,
118
- "eval_accuracy": 0.9512195121951219,
119
- "eval_loss": 0.11822786182165146,
120
- "eval_runtime": 63.9617,
121
- "eval_samples_per_second": 39.743,
122
- "eval_steps_per_second": 1.251,
123
- "step": 114
124
- },
125
- {
126
- "epoch": 2.94,
127
- "step": 114,
128
- "total_flos": 1.1265748270391624e+18,
129
- "train_loss": 0.16734615334293299,
130
- "train_runtime": 848.5959,
131
- "train_samples_per_second": 17.436,
132
- "train_steps_per_second": 0.134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  }
134
  ],
135
  "logging_steps": 12,
136
- "max_steps": 114,
137
  "num_train_epochs": 3,
138
  "save_steps": 500,
139
- "total_flos": 1.1265748270391624e+18,
140
  "trial_name": null,
141
  "trial_params": null
142
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 234,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.01,
13
+ "learning_rate": 2.0833333333333334e-06,
14
+ "loss": 0.1764,
15
  "step": 1
16
  },
17
  {
18
+ "epoch": 0.15,
19
+ "learning_rate": 2.5e-05,
20
+ "loss": 0.2288,
21
  "step": 12
22
  },
23
  {
24
+ "epoch": 0.31,
25
+ "learning_rate": 5e-05,
26
+ "loss": 0.2324,
27
  "step": 24
28
  },
29
  {
30
+ "epoch": 0.46,
31
+ "learning_rate": 4.714285714285714e-05,
32
+ "loss": 0.2094,
33
  "step": 36
34
  },
35
  {
36
+ "epoch": 0.62,
37
+ "learning_rate": 4.428571428571428e-05,
38
+ "loss": 0.2219,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  "step": 48
40
  },
41
  {
42
+ "epoch": 0.77,
43
+ "learning_rate": 4.1428571428571437e-05,
44
+ "loss": 0.2048,
45
  "step": 60
46
  },
47
  {
48
+ "epoch": 0.92,
49
+ "learning_rate": 3.857142857142858e-05,
50
+ "loss": 0.2337,
51
  "step": 72
52
  },
53
  {
54
+ "epoch": 1.0,
55
+ "eval_Macro F1": 0.9058241520350262,
56
+ "eval_Macro Precision": 0.9220297823287216,
57
+ "eval_Macro Recall": 0.9040021601099654,
58
+ "eval_Micro F1": 0.9087221095334685,
59
+ "eval_Micro Precision": 0.9087221095334685,
60
+ "eval_Micro Recall": 0.9087221095334685,
61
+ "eval_Weighted F1": 0.9097572645217704,
62
+ "eval_Weighted Precision": 0.9228816407309685,
63
+ "eval_Weighted Recall": 0.9087221095334685,
64
+ "eval_accuracy": 0.9087221095334685,
65
+ "eval_loss": 0.26681017875671387,
66
+ "eval_runtime": 509.7519,
67
+ "eval_samples_per_second": 3.869,
68
+ "eval_steps_per_second": 0.122,
69
+ "step": 78
70
+ },
71
+ {
72
+ "epoch": 1.08,
73
+ "learning_rate": 3.571428571428572e-05,
74
+ "loss": 0.1943,
75
  "step": 84
76
  },
77
  {
78
+ "epoch": 1.23,
79
+ "learning_rate": 3.285714285714286e-05,
80
+ "loss": 0.2022,
81
  "step": 96
82
  },
83
  {
84
+ "epoch": 1.38,
85
+ "learning_rate": 3e-05,
86
+ "loss": 0.1662,
87
  "step": 108
88
  },
89
  {
90
+ "epoch": 1.54,
91
+ "learning_rate": 2.714285714285714e-05,
92
+ "loss": 0.1906,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 1.69,
97
+ "learning_rate": 2.4285714285714288e-05,
98
+ "loss": 0.1614,
99
+ "step": 132
100
+ },
101
+ {
102
+ "epoch": 1.85,
103
+ "learning_rate": 2.1428571428571428e-05,
104
+ "loss": 0.16,
105
+ "step": 144
106
+ },
107
+ {
108
+ "epoch": 2.0,
109
+ "learning_rate": 1.8571428571428572e-05,
110
+ "loss": 0.1711,
111
+ "step": 156
112
+ },
113
+ {
114
+ "epoch": 2.0,
115
+ "eval_Macro F1": 0.9330778801188252,
116
+ "eval_Macro Precision": 0.9291584729568021,
117
+ "eval_Macro Recall": 0.9402683529435533,
118
+ "eval_Micro F1": 0.9376267748478702,
119
+ "eval_Micro Precision": 0.9376267748478702,
120
+ "eval_Micro Recall": 0.9376267748478702,
121
+ "eval_Weighted F1": 0.9380245851334839,
122
+ "eval_Weighted Precision": 0.9415569462618286,
123
+ "eval_Weighted Recall": 0.9376267748478702,
124
+ "eval_accuracy": 0.9376267748478702,
125
+ "eval_loss": 0.1819760948419571,
126
+ "eval_runtime": 52.791,
127
+ "eval_samples_per_second": 37.355,
128
+ "eval_steps_per_second": 1.174,
129
+ "step": 156
130
+ },
131
+ {
132
+ "epoch": 2.15,
133
+ "learning_rate": 1.5714285714285715e-05,
134
+ "loss": 0.1743,
135
+ "step": 168
136
+ },
137
+ {
138
+ "epoch": 2.31,
139
+ "learning_rate": 1.2857142857142857e-05,
140
+ "loss": 0.1607,
141
+ "step": 180
142
+ },
143
+ {
144
+ "epoch": 2.46,
145
+ "learning_rate": 1e-05,
146
+ "loss": 0.1675,
147
+ "step": 192
148
+ },
149
+ {
150
+ "epoch": 2.62,
151
+ "learning_rate": 7.142857142857143e-06,
152
+ "loss": 0.1393,
153
+ "step": 204
154
+ },
155
+ {
156
+ "epoch": 2.77,
157
+ "learning_rate": 4.285714285714286e-06,
158
+ "loss": 0.1308,
159
+ "step": 216
160
+ },
161
+ {
162
+ "epoch": 2.92,
163
+ "learning_rate": 1.4285714285714286e-06,
164
+ "loss": 0.1297,
165
+ "step": 228
166
+ },
167
+ {
168
+ "epoch": 3.0,
169
+ "eval_Macro F1": 0.9504520666030198,
170
+ "eval_Macro Precision": 0.9506141996378501,
171
+ "eval_Macro Recall": 0.9523141162334021,
172
+ "eval_Micro F1": 0.9523326572008114,
173
+ "eval_Micro Precision": 0.9523326572008114,
174
+ "eval_Micro Recall": 0.9523326572008114,
175
+ "eval_Weighted F1": 0.9523782212526937,
176
+ "eval_Weighted Precision": 0.9543848061355097,
177
+ "eval_Weighted Recall": 0.9523326572008114,
178
+ "eval_accuracy": 0.9523326572008114,
179
+ "eval_loss": 0.1481797993183136,
180
+ "eval_runtime": 51.55,
181
+ "eval_samples_per_second": 38.254,
182
+ "eval_steps_per_second": 1.203,
183
+ "step": 234
184
+ },
185
+ {
186
+ "epoch": 3.0,
187
+ "step": 234,
188
+ "total_flos": 2.320189226545914e+18,
189
+ "train_loss": 0.18204877430047745,
190
+ "train_runtime": 4369.7043,
191
+ "train_samples_per_second": 6.851,
192
+ "train_steps_per_second": 0.054
193
  }
194
  ],
195
  "logging_steps": 12,
196
+ "max_steps": 234,
197
  "num_train_epochs": 3,
198
  "save_steps": 500,
199
+ "total_flos": 2.320189226545914e+18,
200
  "trial_name": null,
201
  "trial_params": null
202
  }