djbp commited on
Commit
1129da7
1 Parent(s): 22f2cb9

End of training

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.8978102189781022
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.2350
36
- - Accuracy: 0.8978
37
 
38
  ## Model description
39
 
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.9148418491484185
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.2418
36
+ - Accuracy: 0.9148
37
 
38
  ## Model description
39
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 9.836065573770492,
3
- "eval_accuracy": 0.9172749391727494,
4
- "eval_loss": 0.20992133021354675,
5
- "eval_runtime": 46.8426,
6
- "eval_samples_per_second": 8.774,
7
- "eval_steps_per_second": 0.149,
8
- "total_flos": 1.9365344998357156e+18,
9
- "train_loss": 0.1960654123624166,
10
- "train_runtime": 8023.8475,
11
- "train_samples_per_second": 9.716,
12
  "train_steps_per_second": 0.037
13
  }
 
1
  {
2
  "epoch": 9.836065573770492,
3
+ "eval_accuracy": 0.9148418491484185,
4
+ "eval_loss": 0.2417779415845871,
5
+ "eval_runtime": 46.6034,
6
+ "eval_samples_per_second": 8.819,
7
+ "eval_steps_per_second": 0.15,
8
+ "total_flos": 1.9063087726729052e+18,
9
+ "train_loss": 0.3150376001993815,
10
+ "train_runtime": 8083.4568,
11
+ "train_samples_per_second": 9.644,
12
  "train_steps_per_second": 0.037
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.836065573770492,
3
- "eval_accuracy": 0.9172749391727494,
4
- "eval_loss": 0.20992133021354675,
5
- "eval_runtime": 46.8426,
6
- "eval_samples_per_second": 8.774,
7
- "eval_steps_per_second": 0.149
8
  }
 
1
  {
2
  "epoch": 9.836065573770492,
3
+ "eval_accuracy": 0.9148418491484185,
4
+ "eval_loss": 0.2417779415845871,
5
+ "eval_runtime": 46.6034,
6
+ "eval_samples_per_second": 8.819,
7
+ "eval_steps_per_second": 0.15
8
  }
runs/Jul08_10-50-37_1d4f294f4c83/events.out.tfevents.1720443978.1d4f294f4c83 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af3800800b1e49887d48b577e832b55fae138a27bb3365af3d24cddf8c4b01ed
3
+ size 140
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.836065573770492,
3
- "total_flos": 1.9365344998357156e+18,
4
- "train_loss": 0.1960654123624166,
5
- "train_runtime": 8023.8475,
6
- "train_samples_per_second": 9.716,
7
  "train_steps_per_second": 0.037
8
  }
 
1
  {
2
  "epoch": 9.836065573770492,
3
+ "total_flos": 1.9063087726729052e+18,
4
+ "train_loss": 0.3150376001993815,
5
+ "train_runtime": 8083.4568,
6
+ "train_samples_per_second": 9.644,
7
  "train_steps_per_second": 0.037
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.9172749391727494,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-Mid-NonMidMarket-Classification/checkpoint-61",
4
  "epoch": 9.836065573770492,
5
  "eval_steps": 500,
6
  "global_step": 300,
@@ -10,311 +10,311 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.32786885245901637,
13
- "grad_norm": 2.2764179706573486,
14
  "learning_rate": 1.6666666666666667e-05,
15
- "loss": 0.2374,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.6557377049180327,
20
- "grad_norm": 3.2821102142333984,
21
  "learning_rate": 3.3333333333333335e-05,
22
- "loss": 0.251,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.9836065573770492,
27
- "grad_norm": 2.859771251678467,
28
  "learning_rate": 5e-05,
29
- "loss": 0.2558,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.9836065573770492,
34
- "eval_accuracy": 0.9124087591240876,
35
- "eval_loss": 0.2283840924501419,
36
- "eval_runtime": 46.3956,
37
- "eval_samples_per_second": 8.859,
38
- "eval_steps_per_second": 0.151,
39
  "step": 30
40
  },
41
  {
42
  "epoch": 1.3114754098360657,
43
- "grad_norm": 2.7073123455047607,
44
  "learning_rate": 4.814814814814815e-05,
45
- "loss": 0.2405,
46
  "step": 40
47
  },
48
  {
49
  "epoch": 1.639344262295082,
50
- "grad_norm": 2.072742462158203,
51
  "learning_rate": 4.62962962962963e-05,
52
- "loss": 0.2385,
53
  "step": 50
54
  },
55
  {
56
  "epoch": 1.9672131147540983,
57
- "grad_norm": 3.4507081508636475,
58
  "learning_rate": 4.4444444444444447e-05,
59
- "loss": 0.2409,
60
  "step": 60
61
  },
62
  {
63
  "epoch": 2.0,
64
- "eval_accuracy": 0.9172749391727494,
65
- "eval_loss": 0.20992133021354675,
66
- "eval_runtime": 46.6612,
67
- "eval_samples_per_second": 8.808,
68
- "eval_steps_per_second": 0.15,
69
  "step": 61
70
  },
71
  {
72
  "epoch": 2.2950819672131146,
73
- "grad_norm": 2.1165263652801514,
74
  "learning_rate": 4.259259259259259e-05,
75
- "loss": 0.2279,
76
  "step": 70
77
  },
78
  {
79
  "epoch": 2.6229508196721314,
80
- "grad_norm": 2.485424757003784,
81
  "learning_rate": 4.074074074074074e-05,
82
- "loss": 0.2246,
83
  "step": 80
84
  },
85
  {
86
  "epoch": 2.9508196721311473,
87
- "grad_norm": 3.2937915325164795,
88
  "learning_rate": 3.888888888888889e-05,
89
- "loss": 0.2151,
90
  "step": 90
91
  },
92
  {
93
  "epoch": 2.9836065573770494,
94
- "eval_accuracy": 0.9051094890510949,
95
- "eval_loss": 0.2273489087820053,
96
- "eval_runtime": 46.5349,
97
- "eval_samples_per_second": 8.832,
98
- "eval_steps_per_second": 0.15,
99
  "step": 91
100
  },
101
  {
102
  "epoch": 3.278688524590164,
103
- "grad_norm": 3.282068967819214,
104
  "learning_rate": 3.7037037037037037e-05,
105
- "loss": 0.2005,
106
  "step": 100
107
  },
108
  {
109
  "epoch": 3.6065573770491803,
110
- "grad_norm": 3.5282602310180664,
111
  "learning_rate": 3.518518518518519e-05,
112
- "loss": 0.2105,
113
  "step": 110
114
  },
115
  {
116
  "epoch": 3.9344262295081966,
117
- "grad_norm": 3.1445822715759277,
118
  "learning_rate": 3.3333333333333335e-05,
119
- "loss": 0.2085,
120
  "step": 120
121
  },
122
  {
123
  "epoch": 4.0,
124
- "eval_accuracy": 0.9002433090024331,
125
- "eval_loss": 0.2337980419397354,
126
- "eval_runtime": 46.9689,
127
- "eval_samples_per_second": 8.75,
128
  "eval_steps_per_second": 0.149,
129
  "step": 122
130
  },
131
  {
132
  "epoch": 4.262295081967213,
133
- "grad_norm": 2.4089198112487793,
134
  "learning_rate": 3.148148148148148e-05,
135
- "loss": 0.1954,
136
  "step": 130
137
  },
138
  {
139
  "epoch": 4.590163934426229,
140
- "grad_norm": 3.176950693130493,
141
  "learning_rate": 2.962962962962963e-05,
142
- "loss": 0.1936,
143
  "step": 140
144
  },
145
  {
146
  "epoch": 4.918032786885246,
147
- "grad_norm": 3.4379076957702637,
148
  "learning_rate": 2.777777777777778e-05,
149
- "loss": 0.1793,
150
  "step": 150
151
  },
152
  {
153
  "epoch": 4.983606557377049,
154
- "eval_accuracy": 0.9051094890510949,
155
- "eval_loss": 0.22886711359024048,
156
- "eval_runtime": 46.6942,
157
- "eval_samples_per_second": 8.802,
158
- "eval_steps_per_second": 0.15,
159
  "step": 152
160
  },
161
  {
162
  "epoch": 5.245901639344262,
163
- "grad_norm": 3.050398826599121,
164
  "learning_rate": 2.5925925925925925e-05,
165
- "loss": 0.206,
166
  "step": 160
167
  },
168
  {
169
  "epoch": 5.573770491803279,
170
- "grad_norm": 4.712157726287842,
171
  "learning_rate": 2.4074074074074074e-05,
172
- "loss": 0.1786,
173
  "step": 170
174
  },
175
  {
176
  "epoch": 5.901639344262295,
177
- "grad_norm": 2.9319636821746826,
178
  "learning_rate": 2.2222222222222223e-05,
179
- "loss": 0.1817,
180
  "step": 180
181
  },
182
  {
183
  "epoch": 6.0,
184
  "eval_accuracy": 0.9075425790754258,
185
- "eval_loss": 0.21736063063144684,
186
- "eval_runtime": 46.7917,
187
- "eval_samples_per_second": 8.784,
188
- "eval_steps_per_second": 0.15,
189
  "step": 183
190
  },
191
  {
192
  "epoch": 6.229508196721311,
193
- "grad_norm": 2.2905595302581787,
194
  "learning_rate": 2.037037037037037e-05,
195
- "loss": 0.1596,
196
  "step": 190
197
  },
198
  {
199
  "epoch": 6.557377049180328,
200
- "grad_norm": 2.4842424392700195,
201
  "learning_rate": 1.8518518518518518e-05,
202
- "loss": 0.18,
203
  "step": 200
204
  },
205
  {
206
  "epoch": 6.885245901639344,
207
- "grad_norm": 2.992955207824707,
208
  "learning_rate": 1.6666666666666667e-05,
209
- "loss": 0.1852,
210
  "step": 210
211
  },
212
  {
213
  "epoch": 6.983606557377049,
214
- "eval_accuracy": 0.9002433090024331,
215
- "eval_loss": 0.22296269237995148,
216
- "eval_runtime": 46.8007,
217
- "eval_samples_per_second": 8.782,
218
- "eval_steps_per_second": 0.15,
219
  "step": 213
220
  },
221
  {
222
  "epoch": 7.213114754098361,
223
- "grad_norm": 3.4494433403015137,
224
  "learning_rate": 1.4814814814814815e-05,
225
- "loss": 0.1667,
226
  "step": 220
227
  },
228
  {
229
  "epoch": 7.540983606557377,
230
- "grad_norm": 2.3728301525115967,
231
  "learning_rate": 1.2962962962962962e-05,
232
- "loss": 0.1626,
233
  "step": 230
234
  },
235
  {
236
  "epoch": 7.868852459016393,
237
- "grad_norm": 3.760058641433716,
238
  "learning_rate": 1.1111111111111112e-05,
239
- "loss": 0.1739,
240
  "step": 240
241
  },
242
  {
243
  "epoch": 8.0,
244
- "eval_accuracy": 0.9099756690997567,
245
- "eval_loss": 0.21712711453437805,
246
- "eval_runtime": 46.8608,
247
- "eval_samples_per_second": 8.771,
248
- "eval_steps_per_second": 0.149,
249
  "step": 244
250
  },
251
  {
252
  "epoch": 8.19672131147541,
253
- "grad_norm": 3.204749584197998,
254
  "learning_rate": 9.259259259259259e-06,
255
- "loss": 0.1717,
256
  "step": 250
257
  },
258
  {
259
  "epoch": 8.524590163934427,
260
- "grad_norm": 3.4265105724334717,
261
  "learning_rate": 7.4074074074074075e-06,
262
- "loss": 0.1585,
263
  "step": 260
264
  },
265
  {
266
  "epoch": 8.852459016393443,
267
- "grad_norm": 3.0664563179016113,
268
  "learning_rate": 5.555555555555556e-06,
269
- "loss": 0.1569,
270
  "step": 270
271
  },
272
  {
273
  "epoch": 8.98360655737705,
274
- "eval_accuracy": 0.9148418491484185,
275
- "eval_loss": 0.2113640010356903,
276
- "eval_runtime": 47.0024,
277
- "eval_samples_per_second": 8.744,
278
- "eval_steps_per_second": 0.149,
279
  "step": 274
280
  },
281
  {
282
  "epoch": 9.180327868852459,
283
- "grad_norm": 4.030396461486816,
284
  "learning_rate": 3.7037037037037037e-06,
285
- "loss": 0.1555,
286
  "step": 280
287
  },
288
  {
289
  "epoch": 9.508196721311476,
290
- "grad_norm": 2.520259380340576,
291
  "learning_rate": 1.8518518518518519e-06,
292
- "loss": 0.1667,
293
  "step": 290
294
  },
295
  {
296
  "epoch": 9.836065573770492,
297
- "grad_norm": 2.792572021484375,
298
  "learning_rate": 0.0,
299
- "loss": 0.1589,
300
  "step": 300
301
  },
302
  {
303
  "epoch": 9.836065573770492,
304
- "eval_accuracy": 0.9099756690997567,
305
- "eval_loss": 0.21540850400924683,
306
- "eval_runtime": 46.8053,
307
- "eval_samples_per_second": 8.781,
308
  "eval_steps_per_second": 0.15,
309
  "step": 300
310
  },
311
  {
312
  "epoch": 9.836065573770492,
313
  "step": 300,
314
- "total_flos": 1.9365344998357156e+18,
315
- "train_loss": 0.1960654123624166,
316
- "train_runtime": 8023.8475,
317
- "train_samples_per_second": 9.716,
318
  "train_steps_per_second": 0.037
319
  }
320
  ],
@@ -335,7 +335,7 @@
335
  "attributes": {}
336
  }
337
  },
338
- "total_flos": 1.9365344998357156e+18,
339
  "train_batch_size": 64,
340
  "trial_name": null,
341
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9148418491484185,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-Mid-NonMidMarket-Classification/checkpoint-213",
4
  "epoch": 9.836065573770492,
5
  "eval_steps": 500,
6
  "global_step": 300,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.32786885245901637,
13
+ "grad_norm": 4.995051860809326,
14
  "learning_rate": 1.6666666666666667e-05,
15
+ "loss": 1.0564,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.6557377049180327,
20
+ "grad_norm": 2.201974868774414,
21
  "learning_rate": 3.3333333333333335e-05,
22
+ "loss": 0.5859,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.9836065573770492,
27
+ "grad_norm": 2.7302303314208984,
28
  "learning_rate": 5e-05,
29
+ "loss": 0.4375,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.9836065573770492,
34
+ "eval_accuracy": 0.8564476885644768,
35
+ "eval_loss": 0.43852365016937256,
36
+ "eval_runtime": 47.0727,
37
+ "eval_samples_per_second": 8.731,
38
+ "eval_steps_per_second": 0.149,
39
  "step": 30
40
  },
41
  {
42
  "epoch": 1.3114754098360657,
43
+ "grad_norm": 1.9822132587432861,
44
  "learning_rate": 4.814814814814815e-05,
45
+ "loss": 0.3956,
46
  "step": 40
47
  },
48
  {
49
  "epoch": 1.639344262295082,
50
+ "grad_norm": 1.4209967851638794,
51
  "learning_rate": 4.62962962962963e-05,
52
+ "loss": 0.3696,
53
  "step": 50
54
  },
55
  {
56
  "epoch": 1.9672131147540983,
57
+ "grad_norm": 2.8973567485809326,
58
  "learning_rate": 4.4444444444444447e-05,
59
+ "loss": 0.3408,
60
  "step": 60
61
  },
62
  {
63
  "epoch": 2.0,
64
+ "eval_accuracy": 0.8978102189781022,
65
+ "eval_loss": 0.2872111201286316,
66
+ "eval_runtime": 46.8601,
67
+ "eval_samples_per_second": 8.771,
68
+ "eval_steps_per_second": 0.149,
69
  "step": 61
70
  },
71
  {
72
  "epoch": 2.2950819672131146,
73
+ "grad_norm": 2.134539842605591,
74
  "learning_rate": 4.259259259259259e-05,
75
+ "loss": 0.3242,
76
  "step": 70
77
  },
78
  {
79
  "epoch": 2.6229508196721314,
80
+ "grad_norm": 1.9525718688964844,
81
  "learning_rate": 4.074074074074074e-05,
82
+ "loss": 0.3175,
83
  "step": 80
84
  },
85
  {
86
  "epoch": 2.9508196721311473,
87
+ "grad_norm": 2.7107627391815186,
88
  "learning_rate": 3.888888888888889e-05,
89
+ "loss": 0.3106,
90
  "step": 90
91
  },
92
  {
93
  "epoch": 2.9836065573770494,
94
+ "eval_accuracy": 0.9099756690997567,
95
+ "eval_loss": 0.2597734332084656,
96
+ "eval_runtime": 47.0794,
97
+ "eval_samples_per_second": 8.73,
98
+ "eval_steps_per_second": 0.149,
99
  "step": 91
100
  },
101
  {
102
  "epoch": 3.278688524590164,
103
+ "grad_norm": 2.890233278274536,
104
  "learning_rate": 3.7037037037037037e-05,
105
+ "loss": 0.2894,
106
  "step": 100
107
  },
108
  {
109
  "epoch": 3.6065573770491803,
110
+ "grad_norm": 2.8648366928100586,
111
  "learning_rate": 3.518518518518519e-05,
112
+ "loss": 0.2802,
113
  "step": 110
114
  },
115
  {
116
  "epoch": 3.9344262295081966,
117
+ "grad_norm": 4.32825231552124,
118
  "learning_rate": 3.3333333333333335e-05,
119
+ "loss": 0.3167,
120
  "step": 120
121
  },
122
  {
123
  "epoch": 4.0,
124
+ "eval_accuracy": 0.9124087591240876,
125
+ "eval_loss": 0.26094919443130493,
126
+ "eval_runtime": 46.9524,
127
+ "eval_samples_per_second": 8.754,
128
  "eval_steps_per_second": 0.149,
129
  "step": 122
130
  },
131
  {
132
  "epoch": 4.262295081967213,
133
+ "grad_norm": 2.3360254764556885,
134
  "learning_rate": 3.148148148148148e-05,
135
+ "loss": 0.2947,
136
  "step": 130
137
  },
138
  {
139
  "epoch": 4.590163934426229,
140
+ "grad_norm": 2.176058292388916,
141
  "learning_rate": 2.962962962962963e-05,
142
+ "loss": 0.2699,
143
  "step": 140
144
  },
145
  {
146
  "epoch": 4.918032786885246,
147
+ "grad_norm": 2.304738998413086,
148
  "learning_rate": 2.777777777777778e-05,
149
+ "loss": 0.2533,
150
  "step": 150
151
  },
152
  {
153
  "epoch": 4.983606557377049,
154
+ "eval_accuracy": 0.9075425790754258,
155
+ "eval_loss": 0.2426266223192215,
156
+ "eval_runtime": 47.2479,
157
+ "eval_samples_per_second": 8.699,
158
+ "eval_steps_per_second": 0.148,
159
  "step": 152
160
  },
161
  {
162
  "epoch": 5.245901639344262,
163
+ "grad_norm": 3.3929357528686523,
164
  "learning_rate": 2.5925925925925925e-05,
165
+ "loss": 0.2827,
166
  "step": 160
167
  },
168
  {
169
  "epoch": 5.573770491803279,
170
+ "grad_norm": 2.577345609664917,
171
  "learning_rate": 2.4074074074074074e-05,
172
+ "loss": 0.2557,
173
  "step": 170
174
  },
175
  {
176
  "epoch": 5.901639344262295,
177
+ "grad_norm": 2.1669633388519287,
178
  "learning_rate": 2.2222222222222223e-05,
179
+ "loss": 0.256,
180
  "step": 180
181
  },
182
  {
183
  "epoch": 6.0,
184
  "eval_accuracy": 0.9075425790754258,
185
+ "eval_loss": 0.2371838092803955,
186
+ "eval_runtime": 47.0773,
187
+ "eval_samples_per_second": 8.73,
188
+ "eval_steps_per_second": 0.149,
189
  "step": 183
190
  },
191
  {
192
  "epoch": 6.229508196721311,
193
+ "grad_norm": 2.977111339569092,
194
  "learning_rate": 2.037037037037037e-05,
195
+ "loss": 0.2479,
196
  "step": 190
197
  },
198
  {
199
  "epoch": 6.557377049180328,
200
+ "grad_norm": 2.62300705909729,
201
  "learning_rate": 1.8518518518518518e-05,
202
+ "loss": 0.2591,
203
  "step": 200
204
  },
205
  {
206
  "epoch": 6.885245901639344,
207
+ "grad_norm": 2.1771183013916016,
208
  "learning_rate": 1.6666666666666667e-05,
209
+ "loss": 0.2492,
210
  "step": 210
211
  },
212
  {
213
  "epoch": 6.983606557377049,
214
+ "eval_accuracy": 0.9148418491484185,
215
+ "eval_loss": 0.2417779415845871,
216
+ "eval_runtime": 47.4002,
217
+ "eval_samples_per_second": 8.671,
218
+ "eval_steps_per_second": 0.148,
219
  "step": 213
220
  },
221
  {
222
  "epoch": 7.213114754098361,
223
+ "grad_norm": 2.1204617023468018,
224
  "learning_rate": 1.4814814814814815e-05,
225
+ "loss": 0.2281,
226
  "step": 220
227
  },
228
  {
229
  "epoch": 7.540983606557377,
230
+ "grad_norm": 2.766522169113159,
231
  "learning_rate": 1.2962962962962962e-05,
232
+ "loss": 0.238,
233
  "step": 230
234
  },
235
  {
236
  "epoch": 7.868852459016393,
237
+ "grad_norm": 2.7315852642059326,
238
  "learning_rate": 1.1111111111111112e-05,
239
+ "loss": 0.2364,
240
  "step": 240
241
  },
242
  {
243
  "epoch": 8.0,
244
+ "eval_accuracy": 0.9051094890510949,
245
+ "eval_loss": 0.23522743582725525,
246
+ "eval_runtime": 47.1934,
247
+ "eval_samples_per_second": 8.709,
248
+ "eval_steps_per_second": 0.148,
249
  "step": 244
250
  },
251
  {
252
  "epoch": 8.19672131147541,
253
+ "grad_norm": 2.156822681427002,
254
  "learning_rate": 9.259259259259259e-06,
255
+ "loss": 0.2334,
256
  "step": 250
257
  },
258
  {
259
  "epoch": 8.524590163934427,
260
+ "grad_norm": 3.404446840286255,
261
  "learning_rate": 7.4074074074074075e-06,
262
+ "loss": 0.2211,
263
  "step": 260
264
  },
265
  {
266
  "epoch": 8.852459016393443,
267
+ "grad_norm": 2.5259647369384766,
268
  "learning_rate": 5.555555555555556e-06,
269
+ "loss": 0.2301,
270
  "step": 270
271
  },
272
  {
273
  "epoch": 8.98360655737705,
274
+ "eval_accuracy": 0.9075425790754258,
275
+ "eval_loss": 0.23476606607437134,
276
+ "eval_runtime": 46.7257,
277
+ "eval_samples_per_second": 8.796,
278
+ "eval_steps_per_second": 0.15,
279
  "step": 274
280
  },
281
  {
282
  "epoch": 9.180327868852459,
283
+ "grad_norm": 3.3290984630584717,
284
  "learning_rate": 3.7037037037037037e-06,
285
+ "loss": 0.2253,
286
  "step": 280
287
  },
288
  {
289
  "epoch": 9.508196721311476,
290
+ "grad_norm": 1.9141846895217896,
291
  "learning_rate": 1.8518518518518519e-06,
292
+ "loss": 0.2201,
293
  "step": 290
294
  },
295
  {
296
  "epoch": 9.836065573770492,
297
+ "grad_norm": 2.769473075866699,
298
  "learning_rate": 0.0,
299
+ "loss": 0.2255,
300
  "step": 300
301
  },
302
  {
303
  "epoch": 9.836065573770492,
304
+ "eval_accuracy": 0.8978102189781022,
305
+ "eval_loss": 0.2350464165210724,
306
+ "eval_runtime": 46.6731,
307
+ "eval_samples_per_second": 8.806,
308
  "eval_steps_per_second": 0.15,
309
  "step": 300
310
  },
311
  {
312
  "epoch": 9.836065573770492,
313
  "step": 300,
314
+ "total_flos": 1.9063087726729052e+18,
315
+ "train_loss": 0.3150376001993815,
316
+ "train_runtime": 8083.4568,
317
+ "train_samples_per_second": 9.644,
318
  "train_steps_per_second": 0.037
319
  }
320
  ],
 
335
  "attributes": {}
336
  }
337
  },
338
+ "total_flos": 1.9063087726729052e+18,
339
  "train_batch_size": 64,
340
  "trial_name": null,
341
  "trial_params": null