ellabettison commited on
Commit
0800564
·
verified ·
1 Parent(s): 17b2f04

🍻 cheers

Browse files
README.md CHANGED
@@ -2,6 +2,7 @@
2
  library_name: transformers
3
  base_model: openai/clip-vit-base-patch32
4
  tags:
 
5
  - generated_from_trainer
6
  metrics:
7
  - accuracy
@@ -15,10 +16,10 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # logo-matching-base
17
 
18
- This model is a fine-tuned version of [openai/clip-vit-base-patch32](https://huggingface.co/openai/clip-vit-base-patch32) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.0712
21
- - Accuracy: 0.1743
22
 
23
  ## Model description
24
 
 
2
  library_name: transformers
3
  base_model: openai/clip-vit-base-patch32
4
  tags:
5
+ - image-classification
6
  - generated_from_trainer
7
  metrics:
8
  - accuracy
 
16
 
17
  # logo-matching-base
18
 
19
+ This model is a fine-tuned version of [openai/clip-vit-base-patch32](https://huggingface.co/openai/clip-vit-base-patch32) on the ellabettison/logo-matching dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.0550
22
+ - Accuracy: 0.2770
23
 
24
  ## Model description
25
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_accuracy": 0.5787671232876712,
4
- "eval_loss": 0.01622641272842884,
5
- "eval_runtime": 3.9702,
6
- "eval_samples_per_second": 73.548,
7
- "eval_steps_per_second": 9.319,
8
  "total_flos": 5.916629591779738e+17,
9
- "train_loss": 0.020844636825805014,
10
- "train_runtime": 401.6875,
11
- "train_samples_per_second": 18.621,
12
- "train_steps_per_second": 1.17
13
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_accuracy": 0.27695167286245354,
4
+ "eval_loss": 0.055018216371536255,
5
+ "eval_runtime": 5.4841,
6
+ "eval_samples_per_second": 98.101,
7
+ "eval_steps_per_second": 12.399,
8
  "total_flos": 5.916629591779738e+17,
9
+ "train_loss": 0.006165030080468413,
10
+ "train_runtime": 454.1808,
11
+ "train_samples_per_second": 16.469,
12
+ "train_steps_per_second": 1.035
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_accuracy": 0.5787671232876712,
4
- "eval_loss": 0.01622641272842884,
5
- "eval_runtime": 3.9702,
6
- "eval_samples_per_second": 73.548,
7
- "eval_steps_per_second": 9.319
8
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_accuracy": 0.27695167286245354,
4
+ "eval_loss": 0.055018216371536255,
5
+ "eval_runtime": 5.4841,
6
+ "eval_samples_per_second": 98.101,
7
+ "eval_steps_per_second": 12.399
8
  }
runs/Jan15_20-14-36_a99b99d67614/events.out.tfevents.1736972553.a99b99d67614.505.33 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:945e5b7c5cac1850ce8a7168d6495b47286e3f606e9be7496602b5395f01b5e3
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 10.0,
3
  "total_flos": 5.916629591779738e+17,
4
- "train_loss": 0.020844636825805014,
5
- "train_runtime": 401.6875,
6
- "train_samples_per_second": 18.621,
7
- "train_steps_per_second": 1.17
8
  }
 
1
  {
2
  "epoch": 10.0,
3
  "total_flos": 5.916629591779738e+17,
4
+ "train_loss": 0.006165030080468413,
5
+ "train_runtime": 454.1808,
6
+ "train_samples_per_second": 16.469,
7
+ "train_steps_per_second": 1.035
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.03154107183218002,
3
- "best_model_checkpoint": "./logo-matching-base/checkpoint-141",
4
  "epoch": 10.0,
5
  "eval_steps": 500,
6
  "global_step": 470,
@@ -10,431 +10,431 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.2127659574468085,
13
- "grad_norm": 0.20348502695560455,
14
  "learning_rate": 0.00019574468085106384,
15
- "loss": 0.2136,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.425531914893617,
20
- "grad_norm": 0.20952267944812775,
21
  "learning_rate": 0.00019148936170212768,
22
- "loss": 0.0284,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.6382978723404256,
27
- "grad_norm": 0.21442832052707672,
28
  "learning_rate": 0.0001872340425531915,
29
- "loss": 0.183,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.851063829787234,
34
- "grad_norm": 0.38694852590560913,
35
  "learning_rate": 0.00018297872340425532,
36
- "loss": 0.0225,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 1.0,
41
- "eval_accuracy": 0.3229357798165138,
42
- "eval_loss": 0.032035112380981445,
43
- "eval_runtime": 6.0654,
44
- "eval_samples_per_second": 89.854,
45
- "eval_steps_per_second": 11.376,
46
  "step": 47
47
  },
48
  {
49
  "epoch": 1.0638297872340425,
50
- "grad_norm": 0.11812812834978104,
51
  "learning_rate": 0.00017872340425531915,
52
- "loss": 0.02,
53
  "step": 50
54
  },
55
  {
56
  "epoch": 1.2765957446808511,
57
- "grad_norm": 0.1575324535369873,
58
  "learning_rate": 0.00017446808510638298,
59
- "loss": 0.0213,
60
  "step": 60
61
  },
62
  {
63
  "epoch": 1.4893617021276595,
64
- "grad_norm": 0.12748625874519348,
65
  "learning_rate": 0.00017021276595744682,
66
- "loss": 0.0207,
67
  "step": 70
68
  },
69
  {
70
  "epoch": 1.702127659574468,
71
- "grad_norm": 0.11703016608953476,
72
  "learning_rate": 0.00016595744680851065,
73
- "loss": 0.0178,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 1.9148936170212765,
78
- "grad_norm": 0.09279447048902512,
79
  "learning_rate": 0.00016170212765957446,
80
- "loss": 0.0184,
81
  "step": 90
82
  },
83
  {
84
  "epoch": 2.0,
85
- "eval_accuracy": 0.27706422018348625,
86
- "eval_loss": 0.03198189660906792,
87
- "eval_runtime": 4.7267,
88
- "eval_samples_per_second": 115.302,
89
- "eval_steps_per_second": 14.598,
90
  "step": 94
91
  },
92
  {
93
  "epoch": 2.127659574468085,
94
- "grad_norm": 0.09347425401210785,
95
  "learning_rate": 0.00015744680851063832,
96
- "loss": 0.0161,
97
  "step": 100
98
  },
99
  {
100
  "epoch": 2.3404255319148937,
101
- "grad_norm": 0.09166835993528366,
102
  "learning_rate": 0.00015319148936170213,
103
- "loss": 0.0169,
104
  "step": 110
105
  },
106
  {
107
  "epoch": 2.5531914893617023,
108
- "grad_norm": 0.12563753128051758,
109
  "learning_rate": 0.00014893617021276596,
110
- "loss": 0.0181,
111
  "step": 120
112
  },
113
  {
114
  "epoch": 2.7659574468085104,
115
- "grad_norm": 0.10614734888076782,
116
  "learning_rate": 0.0001446808510638298,
117
- "loss": 0.0166,
118
  "step": 130
119
  },
120
  {
121
  "epoch": 2.978723404255319,
122
- "grad_norm": 0.10630539804697037,
123
  "learning_rate": 0.00014042553191489363,
124
- "loss": 0.0177,
125
  "step": 140
126
  },
127
  {
128
  "epoch": 3.0,
129
- "eval_accuracy": 0.26788990825688075,
130
- "eval_loss": 0.03154107183218002,
131
- "eval_runtime": 6.1724,
132
- "eval_samples_per_second": 88.296,
133
- "eval_steps_per_second": 11.179,
134
  "step": 141
135
  },
136
  {
137
  "epoch": 3.1914893617021276,
138
- "grad_norm": 0.08008704334497452,
139
  "learning_rate": 0.00013617021276595746,
140
- "loss": 0.0175,
141
  "step": 150
142
  },
143
  {
144
  "epoch": 3.404255319148936,
145
- "grad_norm": 0.1296830177307129,
146
  "learning_rate": 0.00013191489361702127,
147
- "loss": 0.0174,
148
  "step": 160
149
  },
150
  {
151
  "epoch": 3.617021276595745,
152
- "grad_norm": 0.11523136496543884,
153
  "learning_rate": 0.00012765957446808513,
154
- "loss": 0.014,
155
  "step": 170
156
  },
157
  {
158
  "epoch": 3.829787234042553,
159
- "grad_norm": 0.10043615847826004,
160
  "learning_rate": 0.00012340425531914893,
161
- "loss": 0.0144,
162
  "step": 180
163
  },
164
  {
165
  "epoch": 4.0,
166
- "eval_accuracy": 0.25321100917431194,
167
- "eval_loss": 0.03288332372903824,
168
- "eval_runtime": 5.7194,
169
- "eval_samples_per_second": 95.289,
170
- "eval_steps_per_second": 12.064,
171
  "step": 188
172
  },
173
  {
174
  "epoch": 4.042553191489362,
175
- "grad_norm": 0.1341152787208557,
176
  "learning_rate": 0.00011914893617021277,
177
- "loss": 0.0143,
178
  "step": 190
179
  },
180
  {
181
  "epoch": 4.25531914893617,
182
- "grad_norm": 0.11163297295570374,
183
  "learning_rate": 0.00011489361702127661,
184
- "loss": 0.0147,
185
  "step": 200
186
  },
187
  {
188
  "epoch": 4.468085106382979,
189
- "grad_norm": 0.14031550288200378,
190
  "learning_rate": 0.00011063829787234043,
191
- "loss": 0.0129,
192
  "step": 210
193
  },
194
  {
195
  "epoch": 4.680851063829787,
196
- "grad_norm": 0.11095025390386581,
197
  "learning_rate": 0.00010638297872340425,
198
- "loss": 0.0159,
199
  "step": 220
200
  },
201
  {
202
  "epoch": 4.8936170212765955,
203
- "grad_norm": 0.09944932162761688,
204
  "learning_rate": 0.00010212765957446809,
205
- "loss": 0.0133,
206
  "step": 230
207
  },
208
  {
209
  "epoch": 5.0,
210
- "eval_accuracy": 0.30825688073394497,
211
- "eval_loss": 0.03211478143930435,
212
- "eval_runtime": 4.8718,
213
- "eval_samples_per_second": 111.869,
214
- "eval_steps_per_second": 14.163,
215
  "step": 235
216
  },
217
  {
218
  "epoch": 5.1063829787234045,
219
- "grad_norm": 0.07057506591081619,
220
  "learning_rate": 9.787234042553192e-05,
221
- "loss": 0.0118,
222
  "step": 240
223
  },
224
  {
225
  "epoch": 5.319148936170213,
226
- "grad_norm": 0.11148671805858612,
227
  "learning_rate": 9.361702127659576e-05,
228
- "loss": 0.0123,
229
  "step": 250
230
  },
231
  {
232
  "epoch": 5.531914893617021,
233
- "grad_norm": 0.1193113625049591,
234
  "learning_rate": 8.936170212765958e-05,
235
- "loss": 0.0128,
236
  "step": 260
237
  },
238
  {
239
  "epoch": 5.74468085106383,
240
- "grad_norm": 0.15620863437652588,
241
  "learning_rate": 8.510638297872341e-05,
242
- "loss": 0.013,
243
  "step": 270
244
  },
245
  {
246
  "epoch": 5.957446808510638,
247
- "grad_norm": 0.09126376360654831,
248
  "learning_rate": 8.085106382978723e-05,
249
- "loss": 0.0108,
250
  "step": 280
251
  },
252
  {
253
  "epoch": 6.0,
254
- "eval_accuracy": 0.3137614678899083,
255
- "eval_loss": 0.03295579180121422,
256
- "eval_runtime": 4.8988,
257
- "eval_samples_per_second": 111.251,
258
- "eval_steps_per_second": 14.085,
259
  "step": 282
260
  },
261
  {
262
  "epoch": 6.170212765957447,
263
- "grad_norm": 0.11392467468976974,
264
  "learning_rate": 7.659574468085106e-05,
265
- "loss": 0.0115,
266
  "step": 290
267
  },
268
  {
269
  "epoch": 6.382978723404255,
270
- "grad_norm": 0.07867500931024551,
271
  "learning_rate": 7.23404255319149e-05,
272
- "loss": 0.0105,
273
  "step": 300
274
  },
275
  {
276
  "epoch": 6.595744680851064,
277
- "grad_norm": 0.07005509734153748,
278
  "learning_rate": 6.808510638297873e-05,
279
- "loss": 0.0099,
280
  "step": 310
281
  },
282
  {
283
  "epoch": 6.808510638297872,
284
- "grad_norm": 0.10206523537635803,
285
  "learning_rate": 6.382978723404256e-05,
286
- "loss": 0.0097,
287
  "step": 320
288
  },
289
  {
290
  "epoch": 7.0,
291
- "eval_accuracy": 0.25688073394495414,
292
- "eval_loss": 0.034237515181303024,
293
- "eval_runtime": 5.8682,
294
- "eval_samples_per_second": 92.873,
295
- "eval_steps_per_second": 11.758,
296
  "step": 329
297
  },
298
  {
299
  "epoch": 7.0212765957446805,
300
- "grad_norm": 0.15130436420440674,
301
  "learning_rate": 5.9574468085106384e-05,
302
- "loss": 0.0106,
303
  "step": 330
304
  },
305
  {
306
  "epoch": 7.23404255319149,
307
- "grad_norm": 0.07350896298885345,
308
  "learning_rate": 5.531914893617022e-05,
309
- "loss": 0.0086,
310
  "step": 340
311
  },
312
  {
313
  "epoch": 7.446808510638298,
314
- "grad_norm": 0.10417384654283524,
315
  "learning_rate": 5.1063829787234044e-05,
316
- "loss": 0.0108,
317
  "step": 350
318
  },
319
  {
320
  "epoch": 7.659574468085106,
321
- "grad_norm": 0.12092699855566025,
322
  "learning_rate": 4.680851063829788e-05,
323
- "loss": 0.0079,
324
  "step": 360
325
  },
326
  {
327
  "epoch": 7.872340425531915,
328
- "grad_norm": 0.11693856120109558,
329
  "learning_rate": 4.2553191489361704e-05,
330
- "loss": 0.0086,
331
  "step": 370
332
  },
333
  {
334
  "epoch": 8.0,
335
- "eval_accuracy": 0.3192660550458716,
336
- "eval_loss": 0.03412623330950737,
337
- "eval_runtime": 6.8764,
338
- "eval_samples_per_second": 79.257,
339
- "eval_steps_per_second": 10.034,
340
  "step": 376
341
  },
342
  {
343
  "epoch": 8.085106382978724,
344
- "grad_norm": 0.07324172556400299,
345
  "learning_rate": 3.829787234042553e-05,
346
- "loss": 0.0076,
347
  "step": 380
348
  },
349
  {
350
  "epoch": 8.297872340425531,
351
- "grad_norm": 0.1128627359867096,
352
  "learning_rate": 3.4042553191489365e-05,
353
- "loss": 0.0074,
354
  "step": 390
355
  },
356
  {
357
  "epoch": 8.51063829787234,
358
- "grad_norm": 0.10660151392221451,
359
  "learning_rate": 2.9787234042553192e-05,
360
- "loss": 0.0067,
361
  "step": 400
362
  },
363
  {
364
  "epoch": 8.72340425531915,
365
- "grad_norm": 0.0888807401061058,
366
  "learning_rate": 2.5531914893617022e-05,
367
- "loss": 0.0076,
368
  "step": 410
369
  },
370
  {
371
  "epoch": 8.936170212765958,
372
- "grad_norm": 0.07239257544279099,
373
  "learning_rate": 2.1276595744680852e-05,
374
- "loss": 0.0068,
375
  "step": 420
376
  },
377
  {
378
  "epoch": 9.0,
379
- "eval_accuracy": 0.3155963302752294,
380
- "eval_loss": 0.03542930632829666,
381
- "eval_runtime": 5.9234,
382
- "eval_samples_per_second": 92.008,
383
- "eval_steps_per_second": 11.649,
384
  "step": 423
385
  },
386
  {
387
  "epoch": 9.148936170212766,
388
- "grad_norm": 0.08497753739356995,
389
  "learning_rate": 1.7021276595744682e-05,
390
- "loss": 0.0067,
391
  "step": 430
392
  },
393
  {
394
  "epoch": 9.361702127659575,
395
- "grad_norm": 0.06389721482992172,
396
  "learning_rate": 1.2765957446808511e-05,
397
- "loss": 0.0062,
398
  "step": 440
399
  },
400
  {
401
  "epoch": 9.574468085106384,
402
- "grad_norm": 0.06799926608800888,
403
  "learning_rate": 8.510638297872341e-06,
404
- "loss": 0.0054,
405
  "step": 450
406
  },
407
  {
408
  "epoch": 9.787234042553191,
409
- "grad_norm": 0.08153792470693588,
410
  "learning_rate": 4.255319148936171e-06,
411
- "loss": 0.0053,
412
  "step": 460
413
  },
414
  {
415
  "epoch": 10.0,
416
- "grad_norm": 0.1752246469259262,
417
  "learning_rate": 0.0,
418
- "loss": 0.0056,
419
  "step": 470
420
  },
421
  {
422
  "epoch": 10.0,
423
- "eval_accuracy": 0.3339449541284404,
424
- "eval_loss": 0.03554193675518036,
425
- "eval_runtime": 6.6038,
426
- "eval_samples_per_second": 82.529,
427
- "eval_steps_per_second": 10.449,
428
  "step": 470
429
  },
430
  {
431
  "epoch": 10.0,
432
  "step": 470,
433
  "total_flos": 5.916629591779738e+17,
434
- "train_loss": 0.020844636825805014,
435
- "train_runtime": 401.6875,
436
- "train_samples_per_second": 18.621,
437
- "train_steps_per_second": 1.17
438
  }
439
  ],
440
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.04811817407608032,
3
+ "best_model_checkpoint": "./logo-matching-base/checkpoint-47",
4
  "epoch": 10.0,
5
  "eval_steps": 500,
6
  "global_step": 470,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.2127659574468085,
13
+ "grad_norm": 0.07344582676887512,
14
  "learning_rate": 0.00019574468085106384,
15
+ "loss": 0.0184,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.425531914893617,
20
+ "grad_norm": 0.07934936881065369,
21
  "learning_rate": 0.00019148936170212768,
22
+ "loss": 0.0158,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.6382978723404256,
27
+ "grad_norm": 0.06698207557201385,
28
  "learning_rate": 0.0001872340425531915,
29
+ "loss": 0.0147,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.851063829787234,
34
+ "grad_norm": 0.08733490109443665,
35
  "learning_rate": 0.00018297872340425532,
36
+ "loss": 0.014,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 1.0,
41
+ "eval_accuracy": 0.28807339449541286,
42
+ "eval_loss": 0.04811817407608032,
43
+ "eval_runtime": 6.3598,
44
+ "eval_samples_per_second": 85.695,
45
+ "eval_steps_per_second": 10.849,
46
  "step": 47
47
  },
48
  {
49
  "epoch": 1.0638297872340425,
50
+ "grad_norm": 0.062165793031454086,
51
  "learning_rate": 0.00017872340425531915,
52
+ "loss": 0.0126,
53
  "step": 50
54
  },
55
  {
56
  "epoch": 1.2765957446808511,
57
+ "grad_norm": 0.07251156866550446,
58
  "learning_rate": 0.00017446808510638298,
59
+ "loss": 0.0128,
60
  "step": 60
61
  },
62
  {
63
  "epoch": 1.4893617021276595,
64
+ "grad_norm": 0.0481877438724041,
65
  "learning_rate": 0.00017021276595744682,
66
+ "loss": 0.0126,
67
  "step": 70
68
  },
69
  {
70
  "epoch": 1.702127659574468,
71
+ "grad_norm": 0.07360873371362686,
72
  "learning_rate": 0.00016595744680851065,
73
+ "loss": 0.0121,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 1.9148936170212765,
78
+ "grad_norm": 0.058246735483407974,
79
  "learning_rate": 0.00016170212765957446,
80
+ "loss": 0.0127,
81
  "step": 90
82
  },
83
  {
84
  "epoch": 2.0,
85
+ "eval_accuracy": 0.12844036697247707,
86
+ "eval_loss": 0.054430264979600906,
87
+ "eval_runtime": 6.5223,
88
+ "eval_samples_per_second": 83.559,
89
+ "eval_steps_per_second": 10.579,
90
  "step": 94
91
  },
92
  {
93
  "epoch": 2.127659574468085,
94
+ "grad_norm": 0.06463072448968887,
95
  "learning_rate": 0.00015744680851063832,
96
+ "loss": 0.0113,
97
  "step": 100
98
  },
99
  {
100
  "epoch": 2.3404255319148937,
101
+ "grad_norm": 0.055768080055713654,
102
  "learning_rate": 0.00015319148936170213,
103
+ "loss": 0.0099,
104
  "step": 110
105
  },
106
  {
107
  "epoch": 2.5531914893617023,
108
+ "grad_norm": 0.08255070447921753,
109
  "learning_rate": 0.00014893617021276596,
110
+ "loss": 0.0105,
111
  "step": 120
112
  },
113
  {
114
  "epoch": 2.7659574468085104,
115
+ "grad_norm": 0.07059154659509659,
116
  "learning_rate": 0.0001446808510638298,
117
+ "loss": 0.0092,
118
  "step": 130
119
  },
120
  {
121
  "epoch": 2.978723404255319,
122
+ "grad_norm": 0.0670301541686058,
123
  "learning_rate": 0.00014042553191489363,
124
+ "loss": 0.0097,
125
  "step": 140
126
  },
127
  {
128
  "epoch": 3.0,
129
+ "eval_accuracy": 0.13394495412844037,
130
+ "eval_loss": 0.056532666087150574,
131
+ "eval_runtime": 6.5935,
132
+ "eval_samples_per_second": 82.657,
133
+ "eval_steps_per_second": 10.465,
134
  "step": 141
135
  },
136
  {
137
  "epoch": 3.1914893617021276,
138
+ "grad_norm": 0.05259150639176369,
139
  "learning_rate": 0.00013617021276595746,
140
+ "loss": 0.0094,
141
  "step": 150
142
  },
143
  {
144
  "epoch": 3.404255319148936,
145
+ "grad_norm": 0.08173543959856033,
146
  "learning_rate": 0.00013191489361702127,
147
+ "loss": 0.0096,
148
  "step": 160
149
  },
150
  {
151
  "epoch": 3.617021276595745,
152
+ "grad_norm": 0.04590131714940071,
153
  "learning_rate": 0.00012765957446808513,
154
+ "loss": 0.0071,
155
  "step": 170
156
  },
157
  {
158
  "epoch": 3.829787234042553,
159
+ "grad_norm": 0.03957865759730339,
160
  "learning_rate": 0.00012340425531914893,
161
+ "loss": 0.0062,
162
  "step": 180
163
  },
164
  {
165
  "epoch": 4.0,
166
+ "eval_accuracy": 0.1669724770642202,
167
+ "eval_loss": 0.0624094121158123,
168
+ "eval_runtime": 6.6046,
169
+ "eval_samples_per_second": 82.518,
170
+ "eval_steps_per_second": 10.447,
171
  "step": 188
172
  },
173
  {
174
  "epoch": 4.042553191489362,
175
+ "grad_norm": 0.07129650563001633,
176
  "learning_rate": 0.00011914893617021277,
177
+ "loss": 0.0056,
178
  "step": 190
179
  },
180
  {
181
  "epoch": 4.25531914893617,
182
+ "grad_norm": 0.056660715490579605,
183
  "learning_rate": 0.00011489361702127661,
184
+ "loss": 0.0061,
185
  "step": 200
186
  },
187
  {
188
  "epoch": 4.468085106382979,
189
+ "grad_norm": 0.05685529112815857,
190
  "learning_rate": 0.00011063829787234043,
191
+ "loss": 0.005,
192
  "step": 210
193
  },
194
  {
195
  "epoch": 4.680851063829787,
196
+ "grad_norm": 0.03731105104088783,
197
  "learning_rate": 0.00010638297872340425,
198
+ "loss": 0.006,
199
  "step": 220
200
  },
201
  {
202
  "epoch": 4.8936170212765955,
203
+ "grad_norm": 0.039578877389431,
204
  "learning_rate": 0.00010212765957446809,
205
+ "loss": 0.0051,
206
  "step": 230
207
  },
208
  {
209
  "epoch": 5.0,
210
+ "eval_accuracy": 0.23669724770642203,
211
+ "eval_loss": 0.059408094733953476,
212
+ "eval_runtime": 6.822,
213
+ "eval_samples_per_second": 79.889,
214
+ "eval_steps_per_second": 10.114,
215
  "step": 235
216
  },
217
  {
218
  "epoch": 5.1063829787234045,
219
+ "grad_norm": 0.05410230532288551,
220
  "learning_rate": 9.787234042553192e-05,
221
+ "loss": 0.0044,
222
  "step": 240
223
  },
224
  {
225
  "epoch": 5.319148936170213,
226
+ "grad_norm": 0.03862292692065239,
227
  "learning_rate": 9.361702127659576e-05,
228
+ "loss": 0.0037,
229
  "step": 250
230
  },
231
  {
232
  "epoch": 5.531914893617021,
233
+ "grad_norm": 0.06473053991794586,
234
  "learning_rate": 8.936170212765958e-05,
235
+ "loss": 0.0046,
236
  "step": 260
237
  },
238
  {
239
  "epoch": 5.74468085106383,
240
+ "grad_norm": 0.03953048214316368,
241
  "learning_rate": 8.510638297872341e-05,
242
+ "loss": 0.0039,
243
  "step": 270
244
  },
245
  {
246
  "epoch": 5.957446808510638,
247
+ "grad_norm": 0.050171270966529846,
248
  "learning_rate": 8.085106382978723e-05,
249
+ "loss": 0.0037,
250
  "step": 280
251
  },
252
  {
253
  "epoch": 6.0,
254
+ "eval_accuracy": 0.20917431192660552,
255
+ "eval_loss": 0.06461313366889954,
256
+ "eval_runtime": 5.4957,
257
+ "eval_samples_per_second": 99.169,
258
+ "eval_steps_per_second": 12.555,
259
  "step": 282
260
  },
261
  {
262
  "epoch": 6.170212765957447,
263
+ "grad_norm": 0.05051916465163231,
264
  "learning_rate": 7.659574468085106e-05,
265
+ "loss": 0.0035,
266
  "step": 290
267
  },
268
  {
269
  "epoch": 6.382978723404255,
270
+ "grad_norm": 0.03177861496806145,
271
  "learning_rate": 7.23404255319149e-05,
272
+ "loss": 0.0029,
273
  "step": 300
274
  },
275
  {
276
  "epoch": 6.595744680851064,
277
+ "grad_norm": 0.01716785505414009,
278
  "learning_rate": 6.808510638297873e-05,
279
+ "loss": 0.0026,
280
  "step": 310
281
  },
282
  {
283
  "epoch": 6.808510638297872,
284
+ "grad_norm": 0.02455182373523712,
285
  "learning_rate": 6.382978723404256e-05,
286
+ "loss": 0.0023,
287
  "step": 320
288
  },
289
  {
290
  "epoch": 7.0,
291
+ "eval_accuracy": 0.1981651376146789,
292
+ "eval_loss": 0.06650757044553757,
293
+ "eval_runtime": 6.6722,
294
+ "eval_samples_per_second": 81.683,
295
+ "eval_steps_per_second": 10.341,
296
  "step": 329
297
  },
298
  {
299
  "epoch": 7.0212765957446805,
300
+ "grad_norm": 0.06645756959915161,
301
  "learning_rate": 5.9574468085106384e-05,
302
+ "loss": 0.0023,
303
  "step": 330
304
  },
305
  {
306
  "epoch": 7.23404255319149,
307
+ "grad_norm": 0.036258358508348465,
308
  "learning_rate": 5.531914893617022e-05,
309
+ "loss": 0.0022,
310
  "step": 340
311
  },
312
  {
313
  "epoch": 7.446808510638298,
314
+ "grad_norm": 0.055592458695173264,
315
  "learning_rate": 5.1063829787234044e-05,
316
+ "loss": 0.0027,
317
  "step": 350
318
  },
319
  {
320
  "epoch": 7.659574468085106,
321
+ "grad_norm": 0.02403583563864231,
322
  "learning_rate": 4.680851063829788e-05,
323
+ "loss": 0.0015,
324
  "step": 360
325
  },
326
  {
327
  "epoch": 7.872340425531915,
328
+ "grad_norm": 0.01888449862599373,
329
  "learning_rate": 4.2553191489361704e-05,
330
+ "loss": 0.0015,
331
  "step": 370
332
  },
333
  {
334
  "epoch": 8.0,
335
+ "eval_accuracy": 0.1596330275229358,
336
+ "eval_loss": 0.06884702295064926,
337
+ "eval_runtime": 6.3823,
338
+ "eval_samples_per_second": 85.393,
339
+ "eval_steps_per_second": 10.811,
340
  "step": 376
341
  },
342
  {
343
  "epoch": 8.085106382978724,
344
+ "grad_norm": 0.012278878130018711,
345
  "learning_rate": 3.829787234042553e-05,
346
+ "loss": 0.0015,
347
  "step": 380
348
  },
349
  {
350
  "epoch": 8.297872340425531,
351
+ "grad_norm": 0.029290180653333664,
352
  "learning_rate": 3.4042553191489365e-05,
353
+ "loss": 0.0013,
354
  "step": 390
355
  },
356
  {
357
  "epoch": 8.51063829787234,
358
+ "grad_norm": 0.01336819026619196,
359
  "learning_rate": 2.9787234042553192e-05,
360
+ "loss": 0.0012,
361
  "step": 400
362
  },
363
  {
364
  "epoch": 8.72340425531915,
365
+ "grad_norm": 0.01985483057796955,
366
  "learning_rate": 2.5531914893617022e-05,
367
+ "loss": 0.0013,
368
  "step": 410
369
  },
370
  {
371
  "epoch": 8.936170212765958,
372
+ "grad_norm": 0.026989364996552467,
373
  "learning_rate": 2.1276595744680852e-05,
374
+ "loss": 0.0013,
375
  "step": 420
376
  },
377
  {
378
  "epoch": 9.0,
379
+ "eval_accuracy": 0.181651376146789,
380
+ "eval_loss": 0.0706261619925499,
381
+ "eval_runtime": 6.7059,
382
+ "eval_samples_per_second": 81.272,
383
+ "eval_steps_per_second": 10.29,
384
  "step": 423
385
  },
386
  {
387
  "epoch": 9.148936170212766,
388
+ "grad_norm": 0.02496664598584175,
389
  "learning_rate": 1.7021276595744682e-05,
390
+ "loss": 0.0014,
391
  "step": 430
392
  },
393
  {
394
  "epoch": 9.361702127659575,
395
+ "grad_norm": 0.01364427525550127,
396
  "learning_rate": 1.2765957446808511e-05,
397
+ "loss": 0.0009,
398
  "step": 440
399
  },
400
  {
401
  "epoch": 9.574468085106384,
402
+ "grad_norm": 0.010134860873222351,
403
  "learning_rate": 8.510638297872341e-06,
404
+ "loss": 0.0009,
405
  "step": 450
406
  },
407
  {
408
  "epoch": 9.787234042553191,
409
+ "grad_norm": 0.03484776243567467,
410
  "learning_rate": 4.255319148936171e-06,
411
+ "loss": 0.0011,
412
  "step": 460
413
  },
414
  {
415
  "epoch": 10.0,
416
+ "grad_norm": 0.0072807134129107,
417
  "learning_rate": 0.0,
418
+ "loss": 0.0007,
419
  "step": 470
420
  },
421
  {
422
  "epoch": 10.0,
423
+ "eval_accuracy": 0.1743119266055046,
424
+ "eval_loss": 0.07116351276636124,
425
+ "eval_runtime": 6.8862,
426
+ "eval_samples_per_second": 79.144,
427
+ "eval_steps_per_second": 10.02,
428
  "step": 470
429
  },
430
  {
431
  "epoch": 10.0,
432
  "step": 470,
433
  "total_flos": 5.916629591779738e+17,
434
+ "train_loss": 0.006165030080468413,
435
+ "train_runtime": 454.1808,
436
+ "train_samples_per_second": 16.469,
437
+ "train_steps_per_second": 1.035
438
  }
439
  ],
440
  "logging_steps": 10,