KakashiH commited on
Commit
399af4c
·
verified ·
1 Parent(s): 237c3af

Upload 8 files

Browse files
config.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Adialer.C",
13
+ "1": "Allaple.L",
14
+ "2": "C2LOP.gen!g",
15
+ "3": "Dontovo.A",
16
+ "4": "Lolyda.AA1",
17
+ "5": "Lolyda.AT",
18
+ "6": "'Rbot!gen'",
19
+ "7": "Swizzor.gen!I",
20
+ "8": "Yuner.A",
21
+ "9": "Agent.FYI",
22
+ "10": "'Alueron.gen!J'",
23
+ "11": "C2LOP.P",
24
+ "12": "Fakerean",
25
+ "13": "Lolyda.AA2",
26
+ "14": "'Malex.gen!J'",
27
+ "15": "Skintrim.N",
28
+ "16": "VB.AT",
29
+ "17": "Allaple.A",
30
+ "18": "Autorun.K",
31
+ "19": "Dialplatform.B",
32
+ "20": "Instantaccess",
33
+ "21": "Lolyda.AA3",
34
+ "22": "Obfuscator.AD",
35
+ "23": "Swizzor.gen!E",
36
+ "24": "Wintrim.BX"
37
+ },
38
+ "image_size": 224,
39
+ "initializer_range": 0.02,
40
+ "intermediate_size": 3072,
41
+ "label2id": {
42
+ "'Alueron.gen!J'": 10,
43
+ "'Malex.gen!J'": 14,
44
+ "'Rbot!gen'": 6,
45
+ "Adialer.C": 0,
46
+ "Agent.FYI": 9,
47
+ "Allaple.A": 17,
48
+ "Allaple.L": 1,
49
+ "Autorun.K": 18,
50
+ "C2LOP.P": 11,
51
+ "C2LOP.gen!g": 2,
52
+ "Dialplatform.B": 19,
53
+ "Dontovo.A": 3,
54
+ "Fakerean": 12,
55
+ "Instantaccess": 20,
56
+ "Lolyda.AA1": 4,
57
+ "Lolyda.AA2": 13,
58
+ "Lolyda.AA3": 21,
59
+ "Lolyda.AT": 5,
60
+ "Obfuscator.AD": 22,
61
+ "Skintrim.N": 15,
62
+ "Swizzor.gen!E": 23,
63
+ "Swizzor.gen!I": 7,
64
+ "VB.AT": 16,
65
+ "Wintrim.BX": 24,
66
+ "Yuner.A": 8
67
+ },
68
+ "layer_norm_eps": 1e-12,
69
+ "model_type": "vit",
70
+ "num_attention_heads": 12,
71
+ "num_channels": 3,
72
+ "num_hidden_layers": 12,
73
+ "patch_size": 16,
74
+ "problem_type": "single_label_classification",
75
+ "qkv_bias": true,
76
+ "torch_dtype": "float32",
77
+ "transformers_version": "4.48.0"
78
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:524f8ca988c1671327ed7efe4a96401068a9ab693f7cfd468ddd7543dc572d19
3
+ size 343294724
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f5cc6adaa14b12a4fd054fed0861cd9179d6f074f5522d54925efe43e28550a
3
+ size 686710330
preprocessor_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "ViTImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "resample": 2,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "height": 224,
21
+ "width": 224
22
+ }
23
+ }
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d245e05e72192c132e0f2edb6fdcae0c578c890f0fe912f17ec7b0bba2d38cc3
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2c06bc6a0ac1b46f081e30b98c31da39317e125d79cd0e871738127bcf2399f
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,603 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9916765755053508,
3
+ "best_model_checkpoint": "/home/user/Desktop/ViT/MalImg/vit_finetuned/checkpoint-2271",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 3785,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06605019815059446,
13
+ "grad_norm": 5.9336066246032715,
14
+ "learning_rate": 1.9735799207397622e-05,
15
+ "loss": 1.3733,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 0.13210039630118892,
20
+ "grad_norm": 4.252811908721924,
21
+ "learning_rate": 1.9471598414795246e-05,
22
+ "loss": 0.4773,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 0.19815059445178335,
27
+ "grad_norm": 1.3301128149032593,
28
+ "learning_rate": 1.9207397622192866e-05,
29
+ "loss": 0.2271,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 0.26420079260237783,
34
+ "grad_norm": 0.1664501279592514,
35
+ "learning_rate": 1.894319682959049e-05,
36
+ "loss": 0.133,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 0.33025099075297226,
41
+ "grad_norm": 13.456730842590332,
42
+ "learning_rate": 1.8678996036988114e-05,
43
+ "loss": 0.1293,
44
+ "step": 250
45
+ },
46
+ {
47
+ "epoch": 0.3963011889035667,
48
+ "grad_norm": 0.6937832832336426,
49
+ "learning_rate": 1.8414795244385734e-05,
50
+ "loss": 0.0888,
51
+ "step": 300
52
+ },
53
+ {
54
+ "epoch": 0.4623513870541612,
55
+ "grad_norm": 0.14598596096038818,
56
+ "learning_rate": 1.8150594451783358e-05,
57
+ "loss": 0.0692,
58
+ "step": 350
59
+ },
60
+ {
61
+ "epoch": 0.5284015852047557,
62
+ "grad_norm": 0.15269458293914795,
63
+ "learning_rate": 1.7886393659180978e-05,
64
+ "loss": 0.0578,
65
+ "step": 400
66
+ },
67
+ {
68
+ "epoch": 0.5944517833553501,
69
+ "grad_norm": 0.07667677849531174,
70
+ "learning_rate": 1.76221928665786e-05,
71
+ "loss": 0.0573,
72
+ "step": 450
73
+ },
74
+ {
75
+ "epoch": 0.6605019815059445,
76
+ "grad_norm": 0.3432880640029907,
77
+ "learning_rate": 1.7357992073976226e-05,
78
+ "loss": 0.0464,
79
+ "step": 500
80
+ },
81
+ {
82
+ "epoch": 0.726552179656539,
83
+ "grad_norm": 6.246374130249023,
84
+ "learning_rate": 1.7093791281373846e-05,
85
+ "loss": 0.0523,
86
+ "step": 550
87
+ },
88
+ {
89
+ "epoch": 0.7926023778071334,
90
+ "grad_norm": 0.09200052171945572,
91
+ "learning_rate": 1.6829590488771467e-05,
92
+ "loss": 0.071,
93
+ "step": 600
94
+ },
95
+ {
96
+ "epoch": 0.8586525759577279,
97
+ "grad_norm": 0.8642477989196777,
98
+ "learning_rate": 1.656538969616909e-05,
99
+ "loss": 0.0382,
100
+ "step": 650
101
+ },
102
+ {
103
+ "epoch": 0.9247027741083224,
104
+ "grad_norm": 0.06948922574520111,
105
+ "learning_rate": 1.630118890356671e-05,
106
+ "loss": 0.0275,
107
+ "step": 700
108
+ },
109
+ {
110
+ "epoch": 0.9907529722589168,
111
+ "grad_norm": 0.15016400814056396,
112
+ "learning_rate": 1.6036988110964335e-05,
113
+ "loss": 0.0471,
114
+ "step": 750
115
+ },
116
+ {
117
+ "epoch": 1.0,
118
+ "eval_accuracy": 0.9797859690844233,
119
+ "eval_loss": 0.052647557109594345,
120
+ "eval_runtime": 49.7302,
121
+ "eval_samples_per_second": 16.911,
122
+ "eval_steps_per_second": 4.243,
123
+ "step": 757
124
+ },
125
+ {
126
+ "epoch": 1.0568031704095113,
127
+ "grad_norm": 0.7207925319671631,
128
+ "learning_rate": 1.5772787318361958e-05,
129
+ "loss": 0.0358,
130
+ "step": 800
131
+ },
132
+ {
133
+ "epoch": 1.1228533685601056,
134
+ "grad_norm": 0.022695371881127357,
135
+ "learning_rate": 1.550858652575958e-05,
136
+ "loss": 0.0142,
137
+ "step": 850
138
+ },
139
+ {
140
+ "epoch": 1.1889035667107002,
141
+ "grad_norm": 0.01373555138707161,
142
+ "learning_rate": 1.52443857331572e-05,
143
+ "loss": 0.0678,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 1.2549537648612945,
148
+ "grad_norm": 0.02284625917673111,
149
+ "learning_rate": 1.4980184940554823e-05,
150
+ "loss": 0.0174,
151
+ "step": 950
152
+ },
153
+ {
154
+ "epoch": 1.321003963011889,
155
+ "grad_norm": 0.03625660389661789,
156
+ "learning_rate": 1.4715984147952445e-05,
157
+ "loss": 0.0198,
158
+ "step": 1000
159
+ },
160
+ {
161
+ "epoch": 1.3870541611624834,
162
+ "grad_norm": 8.791983604431152,
163
+ "learning_rate": 1.4451783355350067e-05,
164
+ "loss": 0.0181,
165
+ "step": 1050
166
+ },
167
+ {
168
+ "epoch": 1.453104359313078,
169
+ "grad_norm": 10.266106605529785,
170
+ "learning_rate": 1.418758256274769e-05,
171
+ "loss": 0.0166,
172
+ "step": 1100
173
+ },
174
+ {
175
+ "epoch": 1.5191545574636725,
176
+ "grad_norm": 0.08618652075529099,
177
+ "learning_rate": 1.3923381770145313e-05,
178
+ "loss": 0.0216,
179
+ "step": 1150
180
+ },
181
+ {
182
+ "epoch": 1.5852047556142668,
183
+ "grad_norm": 0.020607857033610344,
184
+ "learning_rate": 1.3659180977542933e-05,
185
+ "loss": 0.0166,
186
+ "step": 1200
187
+ },
188
+ {
189
+ "epoch": 1.6512549537648613,
190
+ "grad_norm": 0.008363209664821625,
191
+ "learning_rate": 1.3394980184940555e-05,
192
+ "loss": 0.011,
193
+ "step": 1250
194
+ },
195
+ {
196
+ "epoch": 1.7173051519154559,
197
+ "grad_norm": 8.230094909667969,
198
+ "learning_rate": 1.3130779392338177e-05,
199
+ "loss": 0.0391,
200
+ "step": 1300
201
+ },
202
+ {
203
+ "epoch": 1.7833553500660502,
204
+ "grad_norm": 0.2879900634288788,
205
+ "learning_rate": 1.28665785997358e-05,
206
+ "loss": 0.0281,
207
+ "step": 1350
208
+ },
209
+ {
210
+ "epoch": 1.8494055482166445,
211
+ "grad_norm": 0.6117168068885803,
212
+ "learning_rate": 1.2602377807133423e-05,
213
+ "loss": 0.0275,
214
+ "step": 1400
215
+ },
216
+ {
217
+ "epoch": 1.9154557463672393,
218
+ "grad_norm": 0.04467739537358284,
219
+ "learning_rate": 1.2338177014531045e-05,
220
+ "loss": 0.0301,
221
+ "step": 1450
222
+ },
223
+ {
224
+ "epoch": 1.9815059445178336,
225
+ "grad_norm": 0.017908189445734024,
226
+ "learning_rate": 1.2073976221928667e-05,
227
+ "loss": 0.0178,
228
+ "step": 1500
229
+ },
230
+ {
231
+ "epoch": 2.0,
232
+ "eval_accuracy": 0.9881093935790726,
233
+ "eval_loss": 0.03636582940816879,
234
+ "eval_runtime": 47.4743,
235
+ "eval_samples_per_second": 17.715,
236
+ "eval_steps_per_second": 4.445,
237
+ "step": 1514
238
+ },
239
+ {
240
+ "epoch": 2.047556142668428,
241
+ "grad_norm": 0.036849573254585266,
242
+ "learning_rate": 1.180977542932629e-05,
243
+ "loss": 0.0187,
244
+ "step": 1550
245
+ },
246
+ {
247
+ "epoch": 2.1136063408190227,
248
+ "grad_norm": 0.04460464045405388,
249
+ "learning_rate": 1.1545574636723912e-05,
250
+ "loss": 0.0067,
251
+ "step": 1600
252
+ },
253
+ {
254
+ "epoch": 2.179656538969617,
255
+ "grad_norm": 0.009667308069765568,
256
+ "learning_rate": 1.1281373844121532e-05,
257
+ "loss": 0.0238,
258
+ "step": 1650
259
+ },
260
+ {
261
+ "epoch": 2.2457067371202113,
262
+ "grad_norm": 0.03686352074146271,
263
+ "learning_rate": 1.1017173051519154e-05,
264
+ "loss": 0.0022,
265
+ "step": 1700
266
+ },
267
+ {
268
+ "epoch": 2.3117569352708056,
269
+ "grad_norm": 0.031481340527534485,
270
+ "learning_rate": 1.0752972258916778e-05,
271
+ "loss": 0.0026,
272
+ "step": 1750
273
+ },
274
+ {
275
+ "epoch": 2.3778071334214004,
276
+ "grad_norm": 0.011794793419539928,
277
+ "learning_rate": 1.04887714663144e-05,
278
+ "loss": 0.0116,
279
+ "step": 1800
280
+ },
281
+ {
282
+ "epoch": 2.4438573315719947,
283
+ "grad_norm": 0.010463342070579529,
284
+ "learning_rate": 1.0224570673712022e-05,
285
+ "loss": 0.001,
286
+ "step": 1850
287
+ },
288
+ {
289
+ "epoch": 2.509907529722589,
290
+ "grad_norm": 0.015383273363113403,
291
+ "learning_rate": 9.960369881109644e-06,
292
+ "loss": 0.0228,
293
+ "step": 1900
294
+ },
295
+ {
296
+ "epoch": 2.5759577278731838,
297
+ "grad_norm": 0.013397900387644768,
298
+ "learning_rate": 9.696169088507266e-06,
299
+ "loss": 0.0089,
300
+ "step": 1950
301
+ },
302
+ {
303
+ "epoch": 2.642007926023778,
304
+ "grad_norm": 0.00911177322268486,
305
+ "learning_rate": 9.431968295904888e-06,
306
+ "loss": 0.02,
307
+ "step": 2000
308
+ },
309
+ {
310
+ "epoch": 2.7080581241743724,
311
+ "grad_norm": 0.02440851554274559,
312
+ "learning_rate": 9.16776750330251e-06,
313
+ "loss": 0.0013,
314
+ "step": 2050
315
+ },
316
+ {
317
+ "epoch": 2.7741083223249667,
318
+ "grad_norm": 0.009484563954174519,
319
+ "learning_rate": 8.903566710700134e-06,
320
+ "loss": 0.0194,
321
+ "step": 2100
322
+ },
323
+ {
324
+ "epoch": 2.8401585204755615,
325
+ "grad_norm": 0.006213477812707424,
326
+ "learning_rate": 8.639365918097754e-06,
327
+ "loss": 0.0029,
328
+ "step": 2150
329
+ },
330
+ {
331
+ "epoch": 2.906208718626156,
332
+ "grad_norm": 0.0294838547706604,
333
+ "learning_rate": 8.375165125495377e-06,
334
+ "loss": 0.0155,
335
+ "step": 2200
336
+ },
337
+ {
338
+ "epoch": 2.9722589167767506,
339
+ "grad_norm": 0.004683547653257847,
340
+ "learning_rate": 8.110964332893e-06,
341
+ "loss": 0.0012,
342
+ "step": 2250
343
+ },
344
+ {
345
+ "epoch": 3.0,
346
+ "eval_accuracy": 0.9916765755053508,
347
+ "eval_loss": 0.03940876945853233,
348
+ "eval_runtime": 46.7804,
349
+ "eval_samples_per_second": 17.978,
350
+ "eval_steps_per_second": 4.51,
351
+ "step": 2271
352
+ },
353
+ {
354
+ "epoch": 3.038309114927345,
355
+ "grad_norm": 0.012384450994431973,
356
+ "learning_rate": 7.846763540290622e-06,
357
+ "loss": 0.0023,
358
+ "step": 2300
359
+ },
360
+ {
361
+ "epoch": 3.104359313077939,
362
+ "grad_norm": 0.01143250335007906,
363
+ "learning_rate": 7.582562747688244e-06,
364
+ "loss": 0.0016,
365
+ "step": 2350
366
+ },
367
+ {
368
+ "epoch": 3.1704095112285335,
369
+ "grad_norm": 0.011226821690797806,
370
+ "learning_rate": 7.318361955085866e-06,
371
+ "loss": 0.0015,
372
+ "step": 2400
373
+ },
374
+ {
375
+ "epoch": 3.2364597093791283,
376
+ "grad_norm": 0.1874089390039444,
377
+ "learning_rate": 7.054161162483489e-06,
378
+ "loss": 0.0048,
379
+ "step": 2450
380
+ },
381
+ {
382
+ "epoch": 3.3025099075297226,
383
+ "grad_norm": 0.005090535152703524,
384
+ "learning_rate": 6.78996036988111e-06,
385
+ "loss": 0.0006,
386
+ "step": 2500
387
+ },
388
+ {
389
+ "epoch": 3.368560105680317,
390
+ "grad_norm": 5.946714401245117,
391
+ "learning_rate": 6.525759577278732e-06,
392
+ "loss": 0.0039,
393
+ "step": 2550
394
+ },
395
+ {
396
+ "epoch": 3.4346103038309117,
397
+ "grad_norm": 0.015056404285132885,
398
+ "learning_rate": 6.261558784676355e-06,
399
+ "loss": 0.0039,
400
+ "step": 2600
401
+ },
402
+ {
403
+ "epoch": 3.500660501981506,
404
+ "grad_norm": 0.09337496757507324,
405
+ "learning_rate": 5.997357992073977e-06,
406
+ "loss": 0.0008,
407
+ "step": 2650
408
+ },
409
+ {
410
+ "epoch": 3.5667107001321003,
411
+ "grad_norm": 0.02832830883562565,
412
+ "learning_rate": 5.733157199471598e-06,
413
+ "loss": 0.0017,
414
+ "step": 2700
415
+ },
416
+ {
417
+ "epoch": 3.6327608982826947,
418
+ "grad_norm": 0.003939814865589142,
419
+ "learning_rate": 5.468956406869221e-06,
420
+ "loss": 0.0006,
421
+ "step": 2750
422
+ },
423
+ {
424
+ "epoch": 3.6988110964332894,
425
+ "grad_norm": 0.009257273748517036,
426
+ "learning_rate": 5.204755614266843e-06,
427
+ "loss": 0.01,
428
+ "step": 2800
429
+ },
430
+ {
431
+ "epoch": 3.7648612945838837,
432
+ "grad_norm": 0.03735257685184479,
433
+ "learning_rate": 4.940554821664465e-06,
434
+ "loss": 0.0007,
435
+ "step": 2850
436
+ },
437
+ {
438
+ "epoch": 3.830911492734478,
439
+ "grad_norm": 0.013488363474607468,
440
+ "learning_rate": 4.676354029062087e-06,
441
+ "loss": 0.0064,
442
+ "step": 2900
443
+ },
444
+ {
445
+ "epoch": 3.896961690885073,
446
+ "grad_norm": 0.03222118690609932,
447
+ "learning_rate": 4.412153236459709e-06,
448
+ "loss": 0.0024,
449
+ "step": 2950
450
+ },
451
+ {
452
+ "epoch": 3.963011889035667,
453
+ "grad_norm": 0.007759902160614729,
454
+ "learning_rate": 4.147952443857332e-06,
455
+ "loss": 0.0007,
456
+ "step": 3000
457
+ },
458
+ {
459
+ "epoch": 4.0,
460
+ "eval_accuracy": 0.9916765755053508,
461
+ "eval_loss": 0.030070677399635315,
462
+ "eval_runtime": 47.0985,
463
+ "eval_samples_per_second": 17.856,
464
+ "eval_steps_per_second": 4.48,
465
+ "step": 3028
466
+ },
467
+ {
468
+ "epoch": 4.0290620871862615,
469
+ "grad_norm": 0.005042864475399256,
470
+ "learning_rate": 3.8837516512549536e-06,
471
+ "loss": 0.0004,
472
+ "step": 3050
473
+ },
474
+ {
475
+ "epoch": 4.095112285336856,
476
+ "grad_norm": 0.020858343690633774,
477
+ "learning_rate": 3.619550858652576e-06,
478
+ "loss": 0.0004,
479
+ "step": 3100
480
+ },
481
+ {
482
+ "epoch": 4.16116248348745,
483
+ "grad_norm": 0.006172764115035534,
484
+ "learning_rate": 3.3553500660501986e-06,
485
+ "loss": 0.0007,
486
+ "step": 3150
487
+ },
488
+ {
489
+ "epoch": 4.227212681638045,
490
+ "grad_norm": 0.0033390983007848263,
491
+ "learning_rate": 3.0911492734478207e-06,
492
+ "loss": 0.0009,
493
+ "step": 3200
494
+ },
495
+ {
496
+ "epoch": 4.29326287978864,
497
+ "grad_norm": 0.015372613444924355,
498
+ "learning_rate": 2.8269484808454427e-06,
499
+ "loss": 0.0006,
500
+ "step": 3250
501
+ },
502
+ {
503
+ "epoch": 4.359313077939234,
504
+ "grad_norm": 0.006469820160418749,
505
+ "learning_rate": 2.5627476882430652e-06,
506
+ "loss": 0.0004,
507
+ "step": 3300
508
+ },
509
+ {
510
+ "epoch": 4.425363276089828,
511
+ "grad_norm": 0.011778703890740871,
512
+ "learning_rate": 2.298546895640687e-06,
513
+ "loss": 0.0004,
514
+ "step": 3350
515
+ },
516
+ {
517
+ "epoch": 4.491413474240423,
518
+ "grad_norm": 0.006380158942192793,
519
+ "learning_rate": 2.0343461030383094e-06,
520
+ "loss": 0.0004,
521
+ "step": 3400
522
+ },
523
+ {
524
+ "epoch": 4.557463672391017,
525
+ "grad_norm": 0.0013477399479597807,
526
+ "learning_rate": 1.7701453104359315e-06,
527
+ "loss": 0.0004,
528
+ "step": 3450
529
+ },
530
+ {
531
+ "epoch": 4.623513870541611,
532
+ "grad_norm": 0.006167972926050425,
533
+ "learning_rate": 1.5059445178335536e-06,
534
+ "loss": 0.0042,
535
+ "step": 3500
536
+ },
537
+ {
538
+ "epoch": 4.689564068692206,
539
+ "grad_norm": 0.013963188044726849,
540
+ "learning_rate": 1.2417437252311758e-06,
541
+ "loss": 0.0008,
542
+ "step": 3550
543
+ },
544
+ {
545
+ "epoch": 4.755614266842801,
546
+ "grad_norm": 0.027660081163048744,
547
+ "learning_rate": 9.77542932628798e-07,
548
+ "loss": 0.0004,
549
+ "step": 3600
550
+ },
551
+ {
552
+ "epoch": 4.821664464993395,
553
+ "grad_norm": 0.004752615932375193,
554
+ "learning_rate": 7.133421400264201e-07,
555
+ "loss": 0.0004,
556
+ "step": 3650
557
+ },
558
+ {
559
+ "epoch": 4.887714663143989,
560
+ "grad_norm": 0.009864550083875656,
561
+ "learning_rate": 4.4914134742404235e-07,
562
+ "loss": 0.0004,
563
+ "step": 3700
564
+ },
565
+ {
566
+ "epoch": 4.953764861294584,
567
+ "grad_norm": 0.020459244027733803,
568
+ "learning_rate": 1.849405548216645e-07,
569
+ "loss": 0.0003,
570
+ "step": 3750
571
+ },
572
+ {
573
+ "epoch": 5.0,
574
+ "eval_accuracy": 0.9892984542211652,
575
+ "eval_loss": 0.035547275096178055,
576
+ "eval_runtime": 47.2255,
577
+ "eval_samples_per_second": 17.808,
578
+ "eval_steps_per_second": 4.468,
579
+ "step": 3785
580
+ }
581
+ ],
582
+ "logging_steps": 50,
583
+ "max_steps": 3785,
584
+ "num_input_tokens_seen": 0,
585
+ "num_train_epochs": 5,
586
+ "save_steps": 500,
587
+ "stateful_callbacks": {
588
+ "TrainerControl": {
589
+ "args": {
590
+ "should_epoch_stop": false,
591
+ "should_evaluate": false,
592
+ "should_log": false,
593
+ "should_save": true,
594
+ "should_training_stop": true
595
+ },
596
+ "attributes": {}
597
+ }
598
+ },
599
+ "total_flos": 2.9313511964758426e+18,
600
+ "train_batch_size": 10,
601
+ "trial_name": null,
602
+ "trial_params": null
603
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77a17b7e6e7694df8a9337e023ad093de2fd7afa924b9903f9a05f4c75ea9aa3
3
+ size 5304