Augusto777 commited on
Commit
a2bf0fd
1 Parent(s): d06165f

End of training

Browse files
README.md CHANGED
@@ -16,8 +16,8 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [MBZUAI/swiftformer-xs](https://huggingface.co/MBZUAI/swiftformer-xs) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.6987
20
- - Accuracy: 0.7431
21
 
22
  ## Model description
23
 
 
16
 
17
  This model is a fine-tuned version of [MBZUAI/swiftformer-xs](https://huggingface.co/MBZUAI/swiftformer-xs) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.6964
20
+ - Accuracy: 0.7615
21
 
22
  ## Model description
23
 
all_results.json CHANGED
@@ -1,8 +1,13 @@
1
  {
2
- "epoch": 2.71,
3
- "total_flos": 7291673091440640.0,
4
- "train_loss": 1.383760514713469,
5
- "train_runtime": 25.3309,
6
- "train_samples_per_second": 115.827,
7
- "train_steps_per_second": 0.829
 
 
 
 
 
8
  }
 
1
  {
2
+ "epoch": 36.13,
3
+ "eval_accuracy": 0.7614678899082569,
4
+ "eval_loss": 0.6964432597160339,
5
+ "eval_runtime": 0.6749,
6
+ "eval_samples_per_second": 161.515,
7
+ "eval_steps_per_second": 5.927,
8
+ "total_flos": 9.686412043576934e+16,
9
+ "train_loss": 0.9347471083913531,
10
+ "train_runtime": 250.0799,
11
+ "train_samples_per_second": 156.43,
12
+ "train_steps_per_second": 1.12
13
  }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 36.13,
3
+ "eval_accuracy": 0.7614678899082569,
4
+ "eval_loss": 0.6964432597160339,
5
+ "eval_runtime": 0.6749,
6
+ "eval_samples_per_second": 161.515,
7
+ "eval_steps_per_second": 5.927
8
+ }
runs/Jan28_17-01-40_4890d88f69da/events.out.tfevents.1706461573.4890d88f69da.2289.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6f57b3c27d5d20a4643adca24ed21ddd5462a4ab9decc31f614537c5c7d0021
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.71,
3
- "total_flos": 7291673091440640.0,
4
- "train_loss": 1.383760514713469,
5
- "train_runtime": 25.3309,
6
- "train_samples_per_second": 115.827,
7
- "train_steps_per_second": 0.829
8
  }
 
1
  {
2
+ "epoch": 36.13,
3
+ "total_flos": 9.686412043576934e+16,
4
+ "train_loss": 0.9347471083913531,
5
+ "train_runtime": 250.0799,
6
+ "train_samples_per_second": 156.43,
7
+ "train_steps_per_second": 1.12
8
  }
trainer_state.json CHANGED
@@ -1,67 +1,529 @@
1
  {
2
- "best_metric": 0.3486238532110092,
3
- "best_model_checkpoint": "swiftformer-xs-dmae-va-U-SF/checkpoint-21",
4
- "epoch": 2.709677419354839,
5
  "eval_steps": 500,
6
- "global_step": 21,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.9,
13
- "eval_accuracy": 0.24770642201834864,
14
- "eval_loss": 1.4376270771026611,
15
- "eval_runtime": 0.5029,
16
- "eval_samples_per_second": 216.756,
17
- "eval_steps_per_second": 7.954,
18
  "step": 7
19
  },
20
  {
21
  "epoch": 1.29,
22
- "learning_rate": 3.055555555555556e-05,
23
- "loss": 1.4126,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 1.94,
28
- "eval_accuracy": 0.3211009174311927,
29
- "eval_loss": 1.3581504821777344,
30
- "eval_runtime": 0.4287,
31
- "eval_samples_per_second": 254.235,
32
- "eval_steps_per_second": 9.33,
33
  "step": 15
34
  },
35
  {
36
  "epoch": 2.58,
37
- "learning_rate": 2.777777777777778e-06,
38
- "loss": 1.359,
39
  "step": 20
40
  },
41
  {
42
- "epoch": 2.71,
43
- "eval_accuracy": 0.3486238532110092,
44
- "eval_loss": 1.3463064432144165,
45
- "eval_runtime": 0.4247,
46
- "eval_samples_per_second": 256.633,
47
- "eval_steps_per_second": 9.418,
48
- "step": 21
49
  },
50
  {
51
- "epoch": 2.71,
52
- "step": 21,
53
- "total_flos": 7291673091440640.0,
54
- "train_loss": 1.383760514713469,
55
- "train_runtime": 25.3309,
56
- "train_samples_per_second": 115.827,
57
- "train_steps_per_second": 0.829
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  }
59
  ],
60
  "logging_steps": 10,
61
- "max_steps": 21,
62
- "num_train_epochs": 3,
63
  "save_steps": 500,
64
- "total_flos": 7291673091440640.0,
65
  "trial_name": null,
66
  "trial_params": null
67
  }
 
1
  {
2
+ "best_metric": 0.7614678899082569,
3
+ "best_model_checkpoint": "swiftformer-xs-dmae-va-U-SF/checkpoint-271",
4
+ "epoch": 36.12903225806452,
5
  "eval_steps": 500,
6
+ "global_step": 280,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.9,
13
+ "eval_accuracy": 0.3119266055045872,
14
+ "eval_loss": 1.3887187242507935,
15
+ "eval_runtime": 0.6715,
16
+ "eval_samples_per_second": 162.322,
17
+ "eval_steps_per_second": 5.957,
18
  "step": 7
19
  },
20
  {
21
  "epoch": 1.29,
22
+ "learning_rate": 1.785714285714286e-05,
23
+ "loss": 1.4383,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 1.94,
28
+ "eval_accuracy": 0.41284403669724773,
29
+ "eval_loss": 1.344041347503662,
30
+ "eval_runtime": 0.4343,
31
+ "eval_samples_per_second": 250.972,
32
+ "eval_steps_per_second": 9.21,
33
  "step": 15
34
  },
35
  {
36
  "epoch": 2.58,
37
+ "learning_rate": 3.571428571428572e-05,
38
+ "loss": 1.3956,
39
  "step": 20
40
  },
41
  {
42
+ "epoch": 2.97,
43
+ "eval_accuracy": 0.3761467889908257,
44
+ "eval_loss": 1.3158775568008423,
45
+ "eval_runtime": 0.5875,
46
+ "eval_samples_per_second": 185.531,
47
+ "eval_steps_per_second": 6.808,
48
+ "step": 23
49
  },
50
  {
51
+ "epoch": 3.87,
52
+ "learning_rate": 4.960317460317461e-05,
53
+ "loss": 1.36,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 4.0,
58
+ "eval_accuracy": 0.3853211009174312,
59
+ "eval_loss": 1.2906575202941895,
60
+ "eval_runtime": 0.416,
61
+ "eval_samples_per_second": 262.05,
62
+ "eval_steps_per_second": 9.617,
63
+ "step": 31
64
+ },
65
+ {
66
+ "epoch": 4.9,
67
+ "eval_accuracy": 0.44036697247706424,
68
+ "eval_loss": 1.2488025426864624,
69
+ "eval_runtime": 0.4344,
70
+ "eval_samples_per_second": 250.911,
71
+ "eval_steps_per_second": 9.208,
72
+ "step": 38
73
+ },
74
+ {
75
+ "epoch": 5.16,
76
+ "learning_rate": 4.761904761904762e-05,
77
+ "loss": 1.2912,
78
+ "step": 40
79
+ },
80
+ {
81
+ "epoch": 5.94,
82
+ "eval_accuracy": 0.4036697247706422,
83
+ "eval_loss": 1.2129390239715576,
84
+ "eval_runtime": 0.4322,
85
+ "eval_samples_per_second": 252.176,
86
+ "eval_steps_per_second": 9.254,
87
+ "step": 46
88
+ },
89
+ {
90
+ "epoch": 6.45,
91
+ "learning_rate": 4.563492063492064e-05,
92
+ "loss": 1.2387,
93
+ "step": 50
94
+ },
95
+ {
96
+ "epoch": 6.97,
97
+ "eval_accuracy": 0.46788990825688076,
98
+ "eval_loss": 1.1733514070510864,
99
+ "eval_runtime": 0.4452,
100
+ "eval_samples_per_second": 244.821,
101
+ "eval_steps_per_second": 8.984,
102
+ "step": 54
103
+ },
104
+ {
105
+ "epoch": 7.74,
106
+ "learning_rate": 4.3650793650793655e-05,
107
+ "loss": 1.1607,
108
+ "step": 60
109
+ },
110
+ {
111
+ "epoch": 8.0,
112
+ "eval_accuracy": 0.5137614678899083,
113
+ "eval_loss": 1.143617868423462,
114
+ "eval_runtime": 0.4224,
115
+ "eval_samples_per_second": 258.05,
116
+ "eval_steps_per_second": 9.47,
117
+ "step": 62
118
+ },
119
+ {
120
+ "epoch": 8.9,
121
+ "eval_accuracy": 0.4954128440366973,
122
+ "eval_loss": 1.0990597009658813,
123
+ "eval_runtime": 0.459,
124
+ "eval_samples_per_second": 237.483,
125
+ "eval_steps_per_second": 8.715,
126
+ "step": 69
127
+ },
128
+ {
129
+ "epoch": 9.03,
130
+ "learning_rate": 4.166666666666667e-05,
131
+ "loss": 1.1224,
132
+ "step": 70
133
+ },
134
+ {
135
+ "epoch": 9.94,
136
+ "eval_accuracy": 0.5504587155963303,
137
+ "eval_loss": 1.0478596687316895,
138
+ "eval_runtime": 0.7035,
139
+ "eval_samples_per_second": 154.95,
140
+ "eval_steps_per_second": 5.686,
141
+ "step": 77
142
+ },
143
+ {
144
+ "epoch": 10.32,
145
+ "learning_rate": 3.968253968253968e-05,
146
+ "loss": 1.0547,
147
+ "step": 80
148
+ },
149
+ {
150
+ "epoch": 10.97,
151
+ "eval_accuracy": 0.5963302752293578,
152
+ "eval_loss": 0.9993337392807007,
153
+ "eval_runtime": 0.4348,
154
+ "eval_samples_per_second": 250.685,
155
+ "eval_steps_per_second": 9.199,
156
+ "step": 85
157
+ },
158
+ {
159
+ "epoch": 11.61,
160
+ "learning_rate": 3.76984126984127e-05,
161
+ "loss": 1.0137,
162
+ "step": 90
163
+ },
164
+ {
165
+ "epoch": 12.0,
166
+ "eval_accuracy": 0.6146788990825688,
167
+ "eval_loss": 0.9859614372253418,
168
+ "eval_runtime": 0.5726,
169
+ "eval_samples_per_second": 190.361,
170
+ "eval_steps_per_second": 6.986,
171
+ "step": 93
172
+ },
173
+ {
174
+ "epoch": 12.9,
175
+ "learning_rate": 3.571428571428572e-05,
176
+ "loss": 0.9652,
177
+ "step": 100
178
+ },
179
+ {
180
+ "epoch": 12.9,
181
+ "eval_accuracy": 0.6146788990825688,
182
+ "eval_loss": 0.9697992205619812,
183
+ "eval_runtime": 0.4313,
184
+ "eval_samples_per_second": 252.708,
185
+ "eval_steps_per_second": 9.274,
186
+ "step": 100
187
+ },
188
+ {
189
+ "epoch": 13.94,
190
+ "eval_accuracy": 0.6055045871559633,
191
+ "eval_loss": 0.9519100785255432,
192
+ "eval_runtime": 0.4407,
193
+ "eval_samples_per_second": 247.344,
194
+ "eval_steps_per_second": 9.077,
195
+ "step": 108
196
+ },
197
+ {
198
+ "epoch": 14.19,
199
+ "learning_rate": 3.3730158730158734e-05,
200
+ "loss": 0.9217,
201
+ "step": 110
202
+ },
203
+ {
204
+ "epoch": 14.97,
205
+ "eval_accuracy": 0.6055045871559633,
206
+ "eval_loss": 0.9242026209831238,
207
+ "eval_runtime": 0.4297,
208
+ "eval_samples_per_second": 253.648,
209
+ "eval_steps_per_second": 9.308,
210
+ "step": 116
211
+ },
212
+ {
213
+ "epoch": 15.48,
214
+ "learning_rate": 3.1746031746031745e-05,
215
+ "loss": 0.9122,
216
+ "step": 120
217
+ },
218
+ {
219
+ "epoch": 16.0,
220
+ "eval_accuracy": 0.6146788990825688,
221
+ "eval_loss": 0.9062366485595703,
222
+ "eval_runtime": 0.4329,
223
+ "eval_samples_per_second": 251.781,
224
+ "eval_steps_per_second": 9.24,
225
+ "step": 124
226
+ },
227
+ {
228
+ "epoch": 16.77,
229
+ "learning_rate": 2.9761904761904762e-05,
230
+ "loss": 0.8763,
231
+ "step": 130
232
+ },
233
+ {
234
+ "epoch": 16.9,
235
+ "eval_accuracy": 0.6422018348623854,
236
+ "eval_loss": 0.8872672915458679,
237
+ "eval_runtime": 0.4224,
238
+ "eval_samples_per_second": 258.055,
239
+ "eval_steps_per_second": 9.47,
240
+ "step": 131
241
+ },
242
+ {
243
+ "epoch": 17.94,
244
+ "eval_accuracy": 0.6513761467889908,
245
+ "eval_loss": 0.8477428555488586,
246
+ "eval_runtime": 0.5523,
247
+ "eval_samples_per_second": 197.365,
248
+ "eval_steps_per_second": 7.243,
249
+ "step": 139
250
+ },
251
+ {
252
+ "epoch": 18.06,
253
+ "learning_rate": 2.777777777777778e-05,
254
+ "loss": 0.8471,
255
+ "step": 140
256
+ },
257
+ {
258
+ "epoch": 18.97,
259
+ "eval_accuracy": 0.6513761467889908,
260
+ "eval_loss": 0.8427405953407288,
261
+ "eval_runtime": 0.6098,
262
+ "eval_samples_per_second": 178.737,
263
+ "eval_steps_per_second": 6.559,
264
+ "step": 147
265
+ },
266
+ {
267
+ "epoch": 19.35,
268
+ "learning_rate": 2.5793650793650796e-05,
269
+ "loss": 0.8331,
270
+ "step": 150
271
+ },
272
+ {
273
+ "epoch": 20.0,
274
+ "eval_accuracy": 0.6880733944954128,
275
+ "eval_loss": 0.8257479667663574,
276
+ "eval_runtime": 0.438,
277
+ "eval_samples_per_second": 248.852,
278
+ "eval_steps_per_second": 9.132,
279
+ "step": 155
280
+ },
281
+ {
282
+ "epoch": 20.65,
283
+ "learning_rate": 2.380952380952381e-05,
284
+ "loss": 0.8167,
285
+ "step": 160
286
+ },
287
+ {
288
+ "epoch": 20.9,
289
+ "eval_accuracy": 0.6880733944954128,
290
+ "eval_loss": 0.8025383353233337,
291
+ "eval_runtime": 0.4399,
292
+ "eval_samples_per_second": 247.761,
293
+ "eval_steps_per_second": 9.092,
294
+ "step": 162
295
+ },
296
+ {
297
+ "epoch": 21.94,
298
+ "learning_rate": 2.1825396825396827e-05,
299
+ "loss": 0.8022,
300
+ "step": 170
301
+ },
302
+ {
303
+ "epoch": 21.94,
304
+ "eval_accuracy": 0.6972477064220184,
305
+ "eval_loss": 0.8010965585708618,
306
+ "eval_runtime": 0.4311,
307
+ "eval_samples_per_second": 252.864,
308
+ "eval_steps_per_second": 9.279,
309
+ "step": 170
310
+ },
311
+ {
312
+ "epoch": 22.97,
313
+ "eval_accuracy": 0.6972477064220184,
314
+ "eval_loss": 0.8078291416168213,
315
+ "eval_runtime": 0.4412,
316
+ "eval_samples_per_second": 247.041,
317
+ "eval_steps_per_second": 9.066,
318
+ "step": 178
319
+ },
320
+ {
321
+ "epoch": 23.23,
322
+ "learning_rate": 1.984126984126984e-05,
323
+ "loss": 0.7996,
324
+ "step": 180
325
+ },
326
+ {
327
+ "epoch": 24.0,
328
+ "eval_accuracy": 0.7064220183486238,
329
+ "eval_loss": 0.7920359969139099,
330
+ "eval_runtime": 0.4625,
331
+ "eval_samples_per_second": 235.682,
332
+ "eval_steps_per_second": 8.649,
333
+ "step": 186
334
+ },
335
+ {
336
+ "epoch": 24.52,
337
+ "learning_rate": 1.785714285714286e-05,
338
+ "loss": 0.7962,
339
+ "step": 190
340
+ },
341
+ {
342
+ "epoch": 24.9,
343
+ "eval_accuracy": 0.7247706422018348,
344
+ "eval_loss": 0.760365903377533,
345
+ "eval_runtime": 0.4325,
346
+ "eval_samples_per_second": 251.996,
347
+ "eval_steps_per_second": 9.248,
348
+ "step": 193
349
+ },
350
+ {
351
+ "epoch": 25.81,
352
+ "learning_rate": 1.5873015873015872e-05,
353
+ "loss": 0.7268,
354
+ "step": 200
355
+ },
356
+ {
357
+ "epoch": 25.94,
358
+ "eval_accuracy": 0.6972477064220184,
359
+ "eval_loss": 0.7596898674964905,
360
+ "eval_runtime": 0.568,
361
+ "eval_samples_per_second": 191.903,
362
+ "eval_steps_per_second": 7.042,
363
+ "step": 201
364
+ },
365
+ {
366
+ "epoch": 26.97,
367
+ "eval_accuracy": 0.6972477064220184,
368
+ "eval_loss": 0.7461942434310913,
369
+ "eval_runtime": 0.4265,
370
+ "eval_samples_per_second": 255.55,
371
+ "eval_steps_per_second": 9.378,
372
+ "step": 209
373
+ },
374
+ {
375
+ "epoch": 27.1,
376
+ "learning_rate": 1.388888888888889e-05,
377
+ "loss": 0.7477,
378
+ "step": 210
379
+ },
380
+ {
381
+ "epoch": 28.0,
382
+ "eval_accuracy": 0.7064220183486238,
383
+ "eval_loss": 0.7316110730171204,
384
+ "eval_runtime": 0.6404,
385
+ "eval_samples_per_second": 170.198,
386
+ "eval_steps_per_second": 6.246,
387
+ "step": 217
388
+ },
389
+ {
390
+ "epoch": 28.39,
391
+ "learning_rate": 1.1904761904761905e-05,
392
+ "loss": 0.7411,
393
+ "step": 220
394
+ },
395
+ {
396
+ "epoch": 28.9,
397
+ "eval_accuracy": 0.7522935779816514,
398
+ "eval_loss": 0.7275413870811462,
399
+ "eval_runtime": 0.4398,
400
+ "eval_samples_per_second": 247.819,
401
+ "eval_steps_per_second": 9.094,
402
+ "step": 224
403
+ },
404
+ {
405
+ "epoch": 29.68,
406
+ "learning_rate": 9.92063492063492e-06,
407
+ "loss": 0.7415,
408
+ "step": 230
409
+ },
410
+ {
411
+ "epoch": 29.94,
412
+ "eval_accuracy": 0.7247706422018348,
413
+ "eval_loss": 0.7210223078727722,
414
+ "eval_runtime": 0.4316,
415
+ "eval_samples_per_second": 252.526,
416
+ "eval_steps_per_second": 9.267,
417
+ "step": 232
418
+ },
419
+ {
420
+ "epoch": 30.97,
421
+ "learning_rate": 7.936507936507936e-06,
422
+ "loss": 0.7159,
423
+ "step": 240
424
+ },
425
+ {
426
+ "epoch": 30.97,
427
+ "eval_accuracy": 0.7247706422018348,
428
+ "eval_loss": 0.727079451084137,
429
+ "eval_runtime": 0.4278,
430
+ "eval_samples_per_second": 254.812,
431
+ "eval_steps_per_second": 9.351,
432
+ "step": 240
433
+ },
434
+ {
435
+ "epoch": 32.0,
436
+ "eval_accuracy": 0.7431192660550459,
437
+ "eval_loss": 0.700477659702301,
438
+ "eval_runtime": 0.4579,
439
+ "eval_samples_per_second": 238.037,
440
+ "eval_steps_per_second": 8.735,
441
+ "step": 248
442
+ },
443
+ {
444
+ "epoch": 32.26,
445
+ "learning_rate": 5.9523809523809525e-06,
446
+ "loss": 0.7322,
447
+ "step": 250
448
+ },
449
+ {
450
+ "epoch": 32.9,
451
+ "eval_accuracy": 0.7431192660550459,
452
+ "eval_loss": 0.7012345194816589,
453
+ "eval_runtime": 0.4583,
454
+ "eval_samples_per_second": 237.855,
455
+ "eval_steps_per_second": 8.729,
456
+ "step": 255
457
+ },
458
+ {
459
+ "epoch": 33.55,
460
+ "learning_rate": 3.968253968253968e-06,
461
+ "loss": 0.7124,
462
+ "step": 260
463
+ },
464
+ {
465
+ "epoch": 33.94,
466
+ "eval_accuracy": 0.7522935779816514,
467
+ "eval_loss": 0.7052269577980042,
468
+ "eval_runtime": 0.4302,
469
+ "eval_samples_per_second": 253.362,
470
+ "eval_steps_per_second": 9.298,
471
+ "step": 263
472
+ },
473
+ {
474
+ "epoch": 34.84,
475
+ "learning_rate": 1.984126984126984e-06,
476
+ "loss": 0.7194,
477
+ "step": 270
478
+ },
479
+ {
480
+ "epoch": 34.97,
481
+ "eval_accuracy": 0.7614678899082569,
482
+ "eval_loss": 0.6964432597160339,
483
+ "eval_runtime": 0.6076,
484
+ "eval_samples_per_second": 179.38,
485
+ "eval_steps_per_second": 6.583,
486
+ "step": 271
487
+ },
488
+ {
489
+ "epoch": 36.0,
490
+ "eval_accuracy": 0.7522935779816514,
491
+ "eval_loss": 0.7007263898849487,
492
+ "eval_runtime": 0.4321,
493
+ "eval_samples_per_second": 252.256,
494
+ "eval_steps_per_second": 9.257,
495
+ "step": 279
496
+ },
497
+ {
498
+ "epoch": 36.13,
499
+ "learning_rate": 0.0,
500
+ "loss": 0.6903,
501
+ "step": 280
502
+ },
503
+ {
504
+ "epoch": 36.13,
505
+ "eval_accuracy": 0.7431192660550459,
506
+ "eval_loss": 0.6986872553825378,
507
+ "eval_runtime": 0.4293,
508
+ "eval_samples_per_second": 253.895,
509
+ "eval_steps_per_second": 9.317,
510
+ "step": 280
511
+ },
512
+ {
513
+ "epoch": 36.13,
514
+ "step": 280,
515
+ "total_flos": 9.686412043576934e+16,
516
+ "train_loss": 0.9347471083913531,
517
+ "train_runtime": 250.0799,
518
+ "train_samples_per_second": 156.43,
519
+ "train_steps_per_second": 1.12
520
  }
521
  ],
522
  "logging_steps": 10,
523
+ "max_steps": 280,
524
+ "num_train_epochs": 40,
525
  "save_steps": 500,
526
+ "total_flos": 9.686412043576934e+16,
527
  "trial_name": null,
528
  "trial_params": null
529
  }