Fu-chiang commited on
Commit
f211ef8
1 Parent(s): ad544d2

Upload 8 files

Browse files
config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bit-50",
3
+ "architectures": [
4
+ "BitForImageClassification"
5
+ ],
6
+ "conv_layer": "std_conv",
7
+ "depths": [
8
+ 3,
9
+ 4,
10
+ 6,
11
+ 3
12
+ ],
13
+ "drop_path_rate": 0.0,
14
+ "embedding_dynamic_padding": false,
15
+ "embedding_size": 64,
16
+ "global_padding": null,
17
+ "hidden_act": "relu",
18
+ "hidden_sizes": [
19
+ 256,
20
+ 512,
21
+ 1024,
22
+ 2048
23
+ ],
24
+ "id2label": {
25
+ "0": "melanoma",
26
+ "1": "nevus",
27
+ "2": "seborrheic_keratosis"
28
+ },
29
+ "label2id": {
30
+ "melanoma": 0,
31
+ "nevus": 1,
32
+ "seborrheic_keratosis": 2
33
+ },
34
+ "layer_type": "preactivation",
35
+ "model_type": "bit",
36
+ "num_channels": 3,
37
+ "num_groups": 32,
38
+ "out_features": [
39
+ "stage4"
40
+ ],
41
+ "out_indices": [
42
+ 4
43
+ ],
44
+ "output_stride": 32,
45
+ "problem_type": "single_label_classification",
46
+ "stage_names": [
47
+ "stem",
48
+ "stage1",
49
+ "stage2",
50
+ "stage3",
51
+ "stage4"
52
+ ],
53
+ "torch_dtype": "float32",
54
+ "transformers_version": "4.31.0",
55
+ "width_factor": 1
56
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e5972aef562932591283eca95a4ffbe11204bd97d515cdb2cdf547073d77e63
3
+ size 188142469
preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 448,
4
+ "width": 448
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "image_processor_type": "BitImageProcessor",
17
+ "image_std": [
18
+ 0.5,
19
+ 0.5,
20
+ 0.5
21
+ ],
22
+ "resample": 2,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "shortest_edge": 448
26
+ }
27
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ba1fe789de85b5656a87c2a49163ab836851947b0b51669182448afc44b8076
3
+ size 94077617
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d492226c0859c69b7d7954e558f40046f471ce1a604961375b17757a76021572
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:861929b6e2c3d66c3399c5946123c089831a1b0206860b461cd934bb6336b5cb
3
+ size 627
trainer_state.json ADDED
@@ -0,0 +1,958 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8733333333333333,
3
+ "best_model_checkpoint": "bit-50-skin-lesions\\checkpoint-1062",
4
+ "epoch": 33.984,
5
+ "global_step": 1062,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.32,
12
+ "learning_rate": 3.225806451612903e-06,
13
+ "loss": 1.2002,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.64,
18
+ "learning_rate": 6.451612903225806e-06,
19
+ "loss": 0.9773,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.96,
24
+ "learning_rate": 9.67741935483871e-06,
25
+ "loss": 0.8102,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.99,
30
+ "eval_accuracy": 0.5333333333333333,
31
+ "eval_loss": 0.968501091003418,
32
+ "eval_runtime": 21.6336,
33
+ "eval_samples_per_second": 6.934,
34
+ "eval_steps_per_second": 0.462,
35
+ "step": 31
36
+ },
37
+ {
38
+ "epoch": 1.28,
39
+ "learning_rate": 1.2903225806451613e-05,
40
+ "loss": 0.699,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 1.6,
45
+ "learning_rate": 1.6129032258064517e-05,
46
+ "loss": 0.7167,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 1.92,
51
+ "learning_rate": 1.935483870967742e-05,
52
+ "loss": 0.5819,
53
+ "step": 60
54
+ },
55
+ {
56
+ "epoch": 1.98,
57
+ "eval_accuracy": 0.74,
58
+ "eval_loss": 0.6370717287063599,
59
+ "eval_runtime": 20.2966,
60
+ "eval_samples_per_second": 7.39,
61
+ "eval_steps_per_second": 0.493,
62
+ "step": 62
63
+ },
64
+ {
65
+ "epoch": 2.24,
66
+ "learning_rate": 2.258064516129032e-05,
67
+ "loss": 0.5839,
68
+ "step": 70
69
+ },
70
+ {
71
+ "epoch": 2.56,
72
+ "learning_rate": 2.5806451612903226e-05,
73
+ "loss": 0.547,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 2.88,
78
+ "learning_rate": 2.9032258064516133e-05,
79
+ "loss": 0.5664,
80
+ "step": 90
81
+ },
82
+ {
83
+ "epoch": 2.98,
84
+ "eval_accuracy": 0.7333333333333333,
85
+ "eval_loss": 0.6208174824714661,
86
+ "eval_runtime": 20.1891,
87
+ "eval_samples_per_second": 7.43,
88
+ "eval_steps_per_second": 0.495,
89
+ "step": 93
90
+ },
91
+ {
92
+ "epoch": 3.2,
93
+ "learning_rate": 3.2258064516129034e-05,
94
+ "loss": 0.5493,
95
+ "step": 100
96
+ },
97
+ {
98
+ "epoch": 3.52,
99
+ "learning_rate": 3.548387096774194e-05,
100
+ "loss": 0.5026,
101
+ "step": 110
102
+ },
103
+ {
104
+ "epoch": 3.84,
105
+ "learning_rate": 3.870967741935484e-05,
106
+ "loss": 0.552,
107
+ "step": 120
108
+ },
109
+ {
110
+ "epoch": 4.0,
111
+ "eval_accuracy": 0.7466666666666667,
112
+ "eval_loss": 0.6024346947669983,
113
+ "eval_runtime": 20.2094,
114
+ "eval_samples_per_second": 7.422,
115
+ "eval_steps_per_second": 0.495,
116
+ "step": 125
117
+ },
118
+ {
119
+ "epoch": 4.16,
120
+ "learning_rate": 4.1935483870967746e-05,
121
+ "loss": 0.4447,
122
+ "step": 130
123
+ },
124
+ {
125
+ "epoch": 4.48,
126
+ "learning_rate": 4.516129032258064e-05,
127
+ "loss": 0.5273,
128
+ "step": 140
129
+ },
130
+ {
131
+ "epoch": 4.8,
132
+ "learning_rate": 4.8387096774193554e-05,
133
+ "loss": 0.4998,
134
+ "step": 150
135
+ },
136
+ {
137
+ "epoch": 4.99,
138
+ "eval_accuracy": 0.76,
139
+ "eval_loss": 0.5782171487808228,
140
+ "eval_runtime": 19.9613,
141
+ "eval_samples_per_second": 7.515,
142
+ "eval_steps_per_second": 0.501,
143
+ "step": 156
144
+ },
145
+ {
146
+ "epoch": 5.12,
147
+ "learning_rate": 4.982078853046595e-05,
148
+ "loss": 0.5239,
149
+ "step": 160
150
+ },
151
+ {
152
+ "epoch": 5.44,
153
+ "learning_rate": 4.9462365591397855e-05,
154
+ "loss": 0.4935,
155
+ "step": 170
156
+ },
157
+ {
158
+ "epoch": 5.76,
159
+ "learning_rate": 4.910394265232976e-05,
160
+ "loss": 0.4417,
161
+ "step": 180
162
+ },
163
+ {
164
+ "epoch": 5.98,
165
+ "eval_accuracy": 0.78,
166
+ "eval_loss": 0.504960298538208,
167
+ "eval_runtime": 20.0953,
168
+ "eval_samples_per_second": 7.464,
169
+ "eval_steps_per_second": 0.498,
170
+ "step": 187
171
+ },
172
+ {
173
+ "epoch": 6.08,
174
+ "learning_rate": 4.874551971326165e-05,
175
+ "loss": 0.428,
176
+ "step": 190
177
+ },
178
+ {
179
+ "epoch": 6.4,
180
+ "learning_rate": 4.8387096774193554e-05,
181
+ "loss": 0.4105,
182
+ "step": 200
183
+ },
184
+ {
185
+ "epoch": 6.72,
186
+ "learning_rate": 4.802867383512545e-05,
187
+ "loss": 0.4161,
188
+ "step": 210
189
+ },
190
+ {
191
+ "epoch": 6.98,
192
+ "eval_accuracy": 0.7666666666666667,
193
+ "eval_loss": 0.6188395619392395,
194
+ "eval_runtime": 20.1038,
195
+ "eval_samples_per_second": 7.461,
196
+ "eval_steps_per_second": 0.497,
197
+ "step": 218
198
+ },
199
+ {
200
+ "epoch": 7.04,
201
+ "learning_rate": 4.767025089605735e-05,
202
+ "loss": 0.4169,
203
+ "step": 220
204
+ },
205
+ {
206
+ "epoch": 7.36,
207
+ "learning_rate": 4.731182795698925e-05,
208
+ "loss": 0.306,
209
+ "step": 230
210
+ },
211
+ {
212
+ "epoch": 7.68,
213
+ "learning_rate": 4.695340501792115e-05,
214
+ "loss": 0.3994,
215
+ "step": 240
216
+ },
217
+ {
218
+ "epoch": 8.0,
219
+ "learning_rate": 4.659498207885305e-05,
220
+ "loss": 0.4077,
221
+ "step": 250
222
+ },
223
+ {
224
+ "epoch": 8.0,
225
+ "eval_accuracy": 0.8,
226
+ "eval_loss": 0.630321741104126,
227
+ "eval_runtime": 20.0628,
228
+ "eval_samples_per_second": 7.477,
229
+ "eval_steps_per_second": 0.498,
230
+ "step": 250
231
+ },
232
+ {
233
+ "epoch": 8.32,
234
+ "learning_rate": 4.6236559139784944e-05,
235
+ "loss": 0.3623,
236
+ "step": 260
237
+ },
238
+ {
239
+ "epoch": 8.64,
240
+ "learning_rate": 4.5878136200716846e-05,
241
+ "loss": 0.3911,
242
+ "step": 270
243
+ },
244
+ {
245
+ "epoch": 8.96,
246
+ "learning_rate": 4.551971326164875e-05,
247
+ "loss": 0.4163,
248
+ "step": 280
249
+ },
250
+ {
251
+ "epoch": 8.99,
252
+ "eval_accuracy": 0.7933333333333333,
253
+ "eval_loss": 0.548830509185791,
254
+ "eval_runtime": 20.0908,
255
+ "eval_samples_per_second": 7.466,
256
+ "eval_steps_per_second": 0.498,
257
+ "step": 281
258
+ },
259
+ {
260
+ "epoch": 9.28,
261
+ "learning_rate": 4.516129032258064e-05,
262
+ "loss": 0.319,
263
+ "step": 290
264
+ },
265
+ {
266
+ "epoch": 9.6,
267
+ "learning_rate": 4.4802867383512545e-05,
268
+ "loss": 0.332,
269
+ "step": 300
270
+ },
271
+ {
272
+ "epoch": 9.92,
273
+ "learning_rate": 4.4444444444444447e-05,
274
+ "loss": 0.367,
275
+ "step": 310
276
+ },
277
+ {
278
+ "epoch": 9.98,
279
+ "eval_accuracy": 0.8066666666666666,
280
+ "eval_loss": 0.5251256227493286,
281
+ "eval_runtime": 20.2039,
282
+ "eval_samples_per_second": 7.424,
283
+ "eval_steps_per_second": 0.495,
284
+ "step": 312
285
+ },
286
+ {
287
+ "epoch": 10.24,
288
+ "learning_rate": 4.408602150537635e-05,
289
+ "loss": 0.4215,
290
+ "step": 320
291
+ },
292
+ {
293
+ "epoch": 10.56,
294
+ "learning_rate": 4.372759856630825e-05,
295
+ "loss": 0.3571,
296
+ "step": 330
297
+ },
298
+ {
299
+ "epoch": 10.88,
300
+ "learning_rate": 4.3369175627240145e-05,
301
+ "loss": 0.3391,
302
+ "step": 340
303
+ },
304
+ {
305
+ "epoch": 10.98,
306
+ "eval_accuracy": 0.78,
307
+ "eval_loss": 0.5417120456695557,
308
+ "eval_runtime": 20.1843,
309
+ "eval_samples_per_second": 7.432,
310
+ "eval_steps_per_second": 0.495,
311
+ "step": 343
312
+ },
313
+ {
314
+ "epoch": 11.2,
315
+ "learning_rate": 4.301075268817205e-05,
316
+ "loss": 0.3091,
317
+ "step": 350
318
+ },
319
+ {
320
+ "epoch": 11.52,
321
+ "learning_rate": 4.265232974910394e-05,
322
+ "loss": 0.2611,
323
+ "step": 360
324
+ },
325
+ {
326
+ "epoch": 11.84,
327
+ "learning_rate": 4.2293906810035844e-05,
328
+ "loss": 0.3298,
329
+ "step": 370
330
+ },
331
+ {
332
+ "epoch": 12.0,
333
+ "eval_accuracy": 0.7933333333333333,
334
+ "eval_loss": 0.4787960946559906,
335
+ "eval_runtime": 20.0289,
336
+ "eval_samples_per_second": 7.489,
337
+ "eval_steps_per_second": 0.499,
338
+ "step": 375
339
+ },
340
+ {
341
+ "epoch": 12.16,
342
+ "learning_rate": 4.1935483870967746e-05,
343
+ "loss": 0.2416,
344
+ "step": 380
345
+ },
346
+ {
347
+ "epoch": 12.48,
348
+ "learning_rate": 4.157706093189964e-05,
349
+ "loss": 0.2494,
350
+ "step": 390
351
+ },
352
+ {
353
+ "epoch": 12.8,
354
+ "learning_rate": 4.121863799283154e-05,
355
+ "loss": 0.3133,
356
+ "step": 400
357
+ },
358
+ {
359
+ "epoch": 12.99,
360
+ "eval_accuracy": 0.7733333333333333,
361
+ "eval_loss": 0.6023210883140564,
362
+ "eval_runtime": 19.8848,
363
+ "eval_samples_per_second": 7.543,
364
+ "eval_steps_per_second": 0.503,
365
+ "step": 406
366
+ },
367
+ {
368
+ "epoch": 13.12,
369
+ "learning_rate": 4.0860215053763444e-05,
370
+ "loss": 0.2826,
371
+ "step": 410
372
+ },
373
+ {
374
+ "epoch": 13.44,
375
+ "learning_rate": 4.050179211469534e-05,
376
+ "loss": 0.2571,
377
+ "step": 420
378
+ },
379
+ {
380
+ "epoch": 13.76,
381
+ "learning_rate": 4.014336917562724e-05,
382
+ "loss": 0.2916,
383
+ "step": 430
384
+ },
385
+ {
386
+ "epoch": 13.98,
387
+ "eval_accuracy": 0.76,
388
+ "eval_loss": 0.5615163445472717,
389
+ "eval_runtime": 19.8565,
390
+ "eval_samples_per_second": 7.554,
391
+ "eval_steps_per_second": 0.504,
392
+ "step": 437
393
+ },
394
+ {
395
+ "epoch": 14.08,
396
+ "learning_rate": 3.978494623655914e-05,
397
+ "loss": 0.3033,
398
+ "step": 440
399
+ },
400
+ {
401
+ "epoch": 14.4,
402
+ "learning_rate": 3.9426523297491045e-05,
403
+ "loss": 0.2611,
404
+ "step": 450
405
+ },
406
+ {
407
+ "epoch": 14.72,
408
+ "learning_rate": 3.906810035842295e-05,
409
+ "loss": 0.2491,
410
+ "step": 460
411
+ },
412
+ {
413
+ "epoch": 14.98,
414
+ "eval_accuracy": 0.84,
415
+ "eval_loss": 0.4784340560436249,
416
+ "eval_runtime": 20.1708,
417
+ "eval_samples_per_second": 7.437,
418
+ "eval_steps_per_second": 0.496,
419
+ "step": 468
420
+ },
421
+ {
422
+ "epoch": 15.04,
423
+ "learning_rate": 3.870967741935484e-05,
424
+ "loss": 0.1961,
425
+ "step": 470
426
+ },
427
+ {
428
+ "epoch": 15.36,
429
+ "learning_rate": 3.8351254480286743e-05,
430
+ "loss": 0.2239,
431
+ "step": 480
432
+ },
433
+ {
434
+ "epoch": 15.68,
435
+ "learning_rate": 3.799283154121864e-05,
436
+ "loss": 0.2206,
437
+ "step": 490
438
+ },
439
+ {
440
+ "epoch": 16.0,
441
+ "learning_rate": 3.763440860215054e-05,
442
+ "loss": 0.2213,
443
+ "step": 500
444
+ },
445
+ {
446
+ "epoch": 16.0,
447
+ "eval_accuracy": 0.8,
448
+ "eval_loss": 0.5498819351196289,
449
+ "eval_runtime": 20.1376,
450
+ "eval_samples_per_second": 7.449,
451
+ "eval_steps_per_second": 0.497,
452
+ "step": 500
453
+ },
454
+ {
455
+ "epoch": 16.32,
456
+ "learning_rate": 3.727598566308244e-05,
457
+ "loss": 0.2294,
458
+ "step": 510
459
+ },
460
+ {
461
+ "epoch": 16.64,
462
+ "learning_rate": 3.691756272401434e-05,
463
+ "loss": 0.228,
464
+ "step": 520
465
+ },
466
+ {
467
+ "epoch": 16.96,
468
+ "learning_rate": 3.655913978494624e-05,
469
+ "loss": 0.2334,
470
+ "step": 530
471
+ },
472
+ {
473
+ "epoch": 16.99,
474
+ "eval_accuracy": 0.8133333333333334,
475
+ "eval_loss": 0.5679303407669067,
476
+ "eval_runtime": 19.8409,
477
+ "eval_samples_per_second": 7.56,
478
+ "eval_steps_per_second": 0.504,
479
+ "step": 531
480
+ },
481
+ {
482
+ "epoch": 17.28,
483
+ "learning_rate": 3.6200716845878134e-05,
484
+ "loss": 0.1944,
485
+ "step": 540
486
+ },
487
+ {
488
+ "epoch": 17.6,
489
+ "learning_rate": 3.5842293906810036e-05,
490
+ "loss": 0.1782,
491
+ "step": 550
492
+ },
493
+ {
494
+ "epoch": 17.92,
495
+ "learning_rate": 3.548387096774194e-05,
496
+ "loss": 0.1882,
497
+ "step": 560
498
+ },
499
+ {
500
+ "epoch": 17.98,
501
+ "eval_accuracy": 0.82,
502
+ "eval_loss": 0.5990158319473267,
503
+ "eval_runtime": 19.6405,
504
+ "eval_samples_per_second": 7.637,
505
+ "eval_steps_per_second": 0.509,
506
+ "step": 562
507
+ },
508
+ {
509
+ "epoch": 18.24,
510
+ "learning_rate": 3.512544802867383e-05,
511
+ "loss": 0.1918,
512
+ "step": 570
513
+ },
514
+ {
515
+ "epoch": 18.56,
516
+ "learning_rate": 3.4767025089605734e-05,
517
+ "loss": 0.208,
518
+ "step": 580
519
+ },
520
+ {
521
+ "epoch": 18.88,
522
+ "learning_rate": 3.4408602150537636e-05,
523
+ "loss": 0.2121,
524
+ "step": 590
525
+ },
526
+ {
527
+ "epoch": 18.98,
528
+ "eval_accuracy": 0.8333333333333334,
529
+ "eval_loss": 0.4990116357803345,
530
+ "eval_runtime": 20.1236,
531
+ "eval_samples_per_second": 7.454,
532
+ "eval_steps_per_second": 0.497,
533
+ "step": 593
534
+ },
535
+ {
536
+ "epoch": 19.2,
537
+ "learning_rate": 3.405017921146954e-05,
538
+ "loss": 0.1591,
539
+ "step": 600
540
+ },
541
+ {
542
+ "epoch": 19.52,
543
+ "learning_rate": 3.369175627240144e-05,
544
+ "loss": 0.1472,
545
+ "step": 610
546
+ },
547
+ {
548
+ "epoch": 19.84,
549
+ "learning_rate": 3.3333333333333335e-05,
550
+ "loss": 0.1456,
551
+ "step": 620
552
+ },
553
+ {
554
+ "epoch": 20.0,
555
+ "eval_accuracy": 0.8066666666666666,
556
+ "eval_loss": 0.7644553184509277,
557
+ "eval_runtime": 19.8391,
558
+ "eval_samples_per_second": 7.561,
559
+ "eval_steps_per_second": 0.504,
560
+ "step": 625
561
+ },
562
+ {
563
+ "epoch": 20.16,
564
+ "learning_rate": 3.297491039426524e-05,
565
+ "loss": 0.1569,
566
+ "step": 630
567
+ },
568
+ {
569
+ "epoch": 20.48,
570
+ "learning_rate": 3.261648745519714e-05,
571
+ "loss": 0.1653,
572
+ "step": 640
573
+ },
574
+ {
575
+ "epoch": 20.8,
576
+ "learning_rate": 3.2258064516129034e-05,
577
+ "loss": 0.1938,
578
+ "step": 650
579
+ },
580
+ {
581
+ "epoch": 20.99,
582
+ "eval_accuracy": 0.8266666666666667,
583
+ "eval_loss": 0.579462468624115,
584
+ "eval_runtime": 19.9203,
585
+ "eval_samples_per_second": 7.53,
586
+ "eval_steps_per_second": 0.502,
587
+ "step": 656
588
+ },
589
+ {
590
+ "epoch": 21.12,
591
+ "learning_rate": 3.1899641577060935e-05,
592
+ "loss": 0.1467,
593
+ "step": 660
594
+ },
595
+ {
596
+ "epoch": 21.44,
597
+ "learning_rate": 3.154121863799283e-05,
598
+ "loss": 0.1463,
599
+ "step": 670
600
+ },
601
+ {
602
+ "epoch": 21.76,
603
+ "learning_rate": 3.118279569892473e-05,
604
+ "loss": 0.1489,
605
+ "step": 680
606
+ },
607
+ {
608
+ "epoch": 21.98,
609
+ "eval_accuracy": 0.8333333333333334,
610
+ "eval_loss": 0.4865401089191437,
611
+ "eval_runtime": 20.1519,
612
+ "eval_samples_per_second": 7.443,
613
+ "eval_steps_per_second": 0.496,
614
+ "step": 687
615
+ },
616
+ {
617
+ "epoch": 22.08,
618
+ "learning_rate": 3.0824372759856634e-05,
619
+ "loss": 0.1771,
620
+ "step": 690
621
+ },
622
+ {
623
+ "epoch": 22.4,
624
+ "learning_rate": 3.046594982078853e-05,
625
+ "loss": 0.1178,
626
+ "step": 700
627
+ },
628
+ {
629
+ "epoch": 22.72,
630
+ "learning_rate": 3.010752688172043e-05,
631
+ "loss": 0.1887,
632
+ "step": 710
633
+ },
634
+ {
635
+ "epoch": 22.98,
636
+ "eval_accuracy": 0.8,
637
+ "eval_loss": 0.641372561454773,
638
+ "eval_runtime": 20.0159,
639
+ "eval_samples_per_second": 7.494,
640
+ "eval_steps_per_second": 0.5,
641
+ "step": 718
642
+ },
643
+ {
644
+ "epoch": 23.04,
645
+ "learning_rate": 2.974910394265233e-05,
646
+ "loss": 0.1736,
647
+ "step": 720
648
+ },
649
+ {
650
+ "epoch": 23.36,
651
+ "learning_rate": 2.939068100358423e-05,
652
+ "loss": 0.1307,
653
+ "step": 730
654
+ },
655
+ {
656
+ "epoch": 23.68,
657
+ "learning_rate": 2.9032258064516133e-05,
658
+ "loss": 0.1716,
659
+ "step": 740
660
+ },
661
+ {
662
+ "epoch": 24.0,
663
+ "learning_rate": 2.8673835125448028e-05,
664
+ "loss": 0.1713,
665
+ "step": 750
666
+ },
667
+ {
668
+ "epoch": 24.0,
669
+ "eval_accuracy": 0.84,
670
+ "eval_loss": 0.5748192071914673,
671
+ "eval_runtime": 20.2017,
672
+ "eval_samples_per_second": 7.425,
673
+ "eval_steps_per_second": 0.495,
674
+ "step": 750
675
+ },
676
+ {
677
+ "epoch": 24.32,
678
+ "learning_rate": 2.831541218637993e-05,
679
+ "loss": 0.1503,
680
+ "step": 760
681
+ },
682
+ {
683
+ "epoch": 24.64,
684
+ "learning_rate": 2.7956989247311828e-05,
685
+ "loss": 0.1278,
686
+ "step": 770
687
+ },
688
+ {
689
+ "epoch": 24.96,
690
+ "learning_rate": 2.759856630824373e-05,
691
+ "loss": 0.154,
692
+ "step": 780
693
+ },
694
+ {
695
+ "epoch": 24.99,
696
+ "eval_accuracy": 0.8266666666666667,
697
+ "eval_loss": 0.5349760055541992,
698
+ "eval_runtime": 20.1924,
699
+ "eval_samples_per_second": 7.429,
700
+ "eval_steps_per_second": 0.495,
701
+ "step": 781
702
+ },
703
+ {
704
+ "epoch": 25.28,
705
+ "learning_rate": 2.7240143369175632e-05,
706
+ "loss": 0.0884,
707
+ "step": 790
708
+ },
709
+ {
710
+ "epoch": 25.6,
711
+ "learning_rate": 2.6881720430107527e-05,
712
+ "loss": 0.134,
713
+ "step": 800
714
+ },
715
+ {
716
+ "epoch": 25.92,
717
+ "learning_rate": 2.652329749103943e-05,
718
+ "loss": 0.1232,
719
+ "step": 810
720
+ },
721
+ {
722
+ "epoch": 25.98,
723
+ "eval_accuracy": 0.8466666666666667,
724
+ "eval_loss": 0.5574176907539368,
725
+ "eval_runtime": 20.0038,
726
+ "eval_samples_per_second": 7.499,
727
+ "eval_steps_per_second": 0.5,
728
+ "step": 812
729
+ },
730
+ {
731
+ "epoch": 26.24,
732
+ "learning_rate": 2.616487455197133e-05,
733
+ "loss": 0.1021,
734
+ "step": 820
735
+ },
736
+ {
737
+ "epoch": 26.56,
738
+ "learning_rate": 2.5806451612903226e-05,
739
+ "loss": 0.1958,
740
+ "step": 830
741
+ },
742
+ {
743
+ "epoch": 26.88,
744
+ "learning_rate": 2.5448028673835127e-05,
745
+ "loss": 0.1514,
746
+ "step": 840
747
+ },
748
+ {
749
+ "epoch": 26.98,
750
+ "eval_accuracy": 0.8666666666666667,
751
+ "eval_loss": 0.4726710021495819,
752
+ "eval_runtime": 20.1425,
753
+ "eval_samples_per_second": 7.447,
754
+ "eval_steps_per_second": 0.496,
755
+ "step": 843
756
+ },
757
+ {
758
+ "epoch": 27.2,
759
+ "learning_rate": 2.5089605734767026e-05,
760
+ "loss": 0.1184,
761
+ "step": 850
762
+ },
763
+ {
764
+ "epoch": 27.52,
765
+ "learning_rate": 2.4731182795698928e-05,
766
+ "loss": 0.1252,
767
+ "step": 860
768
+ },
769
+ {
770
+ "epoch": 27.84,
771
+ "learning_rate": 2.4372759856630826e-05,
772
+ "loss": 0.0835,
773
+ "step": 870
774
+ },
775
+ {
776
+ "epoch": 28.0,
777
+ "eval_accuracy": 0.8133333333333334,
778
+ "eval_loss": 0.8025414943695068,
779
+ "eval_runtime": 19.9723,
780
+ "eval_samples_per_second": 7.51,
781
+ "eval_steps_per_second": 0.501,
782
+ "step": 875
783
+ },
784
+ {
785
+ "epoch": 28.16,
786
+ "learning_rate": 2.4014336917562724e-05,
787
+ "loss": 0.1916,
788
+ "step": 880
789
+ },
790
+ {
791
+ "epoch": 28.48,
792
+ "learning_rate": 2.3655913978494626e-05,
793
+ "loss": 0.1547,
794
+ "step": 890
795
+ },
796
+ {
797
+ "epoch": 28.8,
798
+ "learning_rate": 2.3297491039426525e-05,
799
+ "loss": 0.1012,
800
+ "step": 900
801
+ },
802
+ {
803
+ "epoch": 28.99,
804
+ "eval_accuracy": 0.8666666666666667,
805
+ "eval_loss": 0.5923764705657959,
806
+ "eval_runtime": 19.9464,
807
+ "eval_samples_per_second": 7.52,
808
+ "eval_steps_per_second": 0.501,
809
+ "step": 906
810
+ },
811
+ {
812
+ "epoch": 29.12,
813
+ "learning_rate": 2.2939068100358423e-05,
814
+ "loss": 0.0884,
815
+ "step": 910
816
+ },
817
+ {
818
+ "epoch": 29.44,
819
+ "learning_rate": 2.258064516129032e-05,
820
+ "loss": 0.0869,
821
+ "step": 920
822
+ },
823
+ {
824
+ "epoch": 29.76,
825
+ "learning_rate": 2.2222222222222223e-05,
826
+ "loss": 0.1328,
827
+ "step": 930
828
+ },
829
+ {
830
+ "epoch": 29.98,
831
+ "eval_accuracy": 0.86,
832
+ "eval_loss": 0.5381821990013123,
833
+ "eval_runtime": 20.537,
834
+ "eval_samples_per_second": 7.304,
835
+ "eval_steps_per_second": 0.487,
836
+ "step": 937
837
+ },
838
+ {
839
+ "epoch": 30.08,
840
+ "learning_rate": 2.1863799283154125e-05,
841
+ "loss": 0.1074,
842
+ "step": 940
843
+ },
844
+ {
845
+ "epoch": 30.4,
846
+ "learning_rate": 2.1505376344086024e-05,
847
+ "loss": 0.1311,
848
+ "step": 950
849
+ },
850
+ {
851
+ "epoch": 30.72,
852
+ "learning_rate": 2.1146953405017922e-05,
853
+ "loss": 0.1243,
854
+ "step": 960
855
+ },
856
+ {
857
+ "epoch": 30.98,
858
+ "eval_accuracy": 0.8466666666666667,
859
+ "eval_loss": 0.5992811322212219,
860
+ "eval_runtime": 19.7967,
861
+ "eval_samples_per_second": 7.577,
862
+ "eval_steps_per_second": 0.505,
863
+ "step": 968
864
+ },
865
+ {
866
+ "epoch": 31.04,
867
+ "learning_rate": 2.078853046594982e-05,
868
+ "loss": 0.122,
869
+ "step": 970
870
+ },
871
+ {
872
+ "epoch": 31.36,
873
+ "learning_rate": 2.0430107526881722e-05,
874
+ "loss": 0.0976,
875
+ "step": 980
876
+ },
877
+ {
878
+ "epoch": 31.68,
879
+ "learning_rate": 2.007168458781362e-05,
880
+ "loss": 0.114,
881
+ "step": 990
882
+ },
883
+ {
884
+ "epoch": 32.0,
885
+ "learning_rate": 1.9713261648745522e-05,
886
+ "loss": 0.0924,
887
+ "step": 1000
888
+ },
889
+ {
890
+ "epoch": 32.0,
891
+ "eval_accuracy": 0.8133333333333334,
892
+ "eval_loss": 0.7414191961288452,
893
+ "eval_runtime": 20.4518,
894
+ "eval_samples_per_second": 7.334,
895
+ "eval_steps_per_second": 0.489,
896
+ "step": 1000
897
+ },
898
+ {
899
+ "epoch": 32.32,
900
+ "learning_rate": 1.935483870967742e-05,
901
+ "loss": 0.117,
902
+ "step": 1010
903
+ },
904
+ {
905
+ "epoch": 32.64,
906
+ "learning_rate": 1.899641577060932e-05,
907
+ "loss": 0.1152,
908
+ "step": 1020
909
+ },
910
+ {
911
+ "epoch": 32.96,
912
+ "learning_rate": 1.863799283154122e-05,
913
+ "loss": 0.1101,
914
+ "step": 1030
915
+ },
916
+ {
917
+ "epoch": 32.99,
918
+ "eval_accuracy": 0.8333333333333334,
919
+ "eval_loss": 0.7131091952323914,
920
+ "eval_runtime": 20.5438,
921
+ "eval_samples_per_second": 7.301,
922
+ "eval_steps_per_second": 0.487,
923
+ "step": 1031
924
+ },
925
+ {
926
+ "epoch": 33.28,
927
+ "learning_rate": 1.827956989247312e-05,
928
+ "loss": 0.0776,
929
+ "step": 1040
930
+ },
931
+ {
932
+ "epoch": 33.6,
933
+ "learning_rate": 1.7921146953405018e-05,
934
+ "loss": 0.0753,
935
+ "step": 1050
936
+ },
937
+ {
938
+ "epoch": 33.92,
939
+ "learning_rate": 1.7562724014336916e-05,
940
+ "loss": 0.0785,
941
+ "step": 1060
942
+ },
943
+ {
944
+ "epoch": 33.98,
945
+ "eval_accuracy": 0.8733333333333333,
946
+ "eval_loss": 0.5796133279800415,
947
+ "eval_runtime": 19.6695,
948
+ "eval_samples_per_second": 7.626,
949
+ "eval_steps_per_second": 0.508,
950
+ "step": 1062
951
+ }
952
+ ],
953
+ "max_steps": 1550,
954
+ "num_train_epochs": 50,
955
+ "total_flos": 5.774646411362304e+18,
956
+ "trial_name": null,
957
+ "trial_params": null
958
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f25ef23bf58a2c6bcb4b87345ed869b5952faad8168d73b0c5bbaeafee2f66d
3
+ size 4027