Augusto777 commited on
Commit
82309f0
1 Parent(s): 4082931

End of training

Browse files
README.md ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: microsoft/swinv2-tiny-patch4-window8-256
4
+ tags:
5
+ - generated_from_trainer
6
+ datasets:
7
+ - imagefolder
8
+ metrics:
9
+ - accuracy
10
+ model-index:
11
+ - name: swinv2-tiny-patch4-window8-256-Ocular-Toxoplasmosis
12
+ results:
13
+ - task:
14
+ name: Image Classification
15
+ type: image-classification
16
+ dataset:
17
+ name: imagefolder
18
+ type: imagefolder
19
+ config: default
20
+ split: validation
21
+ args: default
22
+ metrics:
23
+ - name: Accuracy
24
+ type: accuracy
25
+ value: 0.08064516129032258
26
+ ---
27
+
28
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
29
+ should probably proofread and complete it, then remove this comment. -->
30
+
31
+ # swinv2-tiny-patch4-window8-256-Ocular-Toxoplasmosis
32
+
33
+ This model is a fine-tuned version of [microsoft/swinv2-tiny-patch4-window8-256](https://huggingface.co/microsoft/swinv2-tiny-patch4-window8-256) on the imagefolder dataset.
34
+ It achieves the following results on the evaluation set:
35
+ - Loss: 8.8834
36
+ - Accuracy: 0.0806
37
+
38
+ ## Model description
39
+
40
+ More information needed
41
+
42
+ ## Intended uses & limitations
43
+
44
+ More information needed
45
+
46
+ ## Training and evaluation data
47
+
48
+ More information needed
49
+
50
+ ## Training procedure
51
+
52
+ ### Training hyperparameters
53
+
54
+ The following hyperparameters were used during training:
55
+ - learning_rate: 5e-05
56
+ - train_batch_size: 32
57
+ - eval_batch_size: 32
58
+ - seed: 42
59
+ - gradient_accumulation_steps: 4
60
+ - total_train_batch_size: 128
61
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
62
+ - lr_scheduler_type: linear
63
+ - lr_scheduler_warmup_ratio: 0.1
64
+ - num_epochs: 40
65
+
66
+ ### Training results
67
+
68
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
69
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
70
+ | No log | 0.73 | 2 | 8.8834 | 0.0806 |
71
+ | No log | 1.82 | 5 | 8.8522 | 0.0806 |
72
+ | No log | 2.91 | 8 | 8.7000 | 0.0806 |
73
+ | 8.7803 | 4.0 | 11 | 8.2692 | 0.0806 |
74
+ | 8.7803 | 4.73 | 13 | 7.8836 | 0.0806 |
75
+ | 8.7803 | 5.82 | 16 | 7.3279 | 0.0806 |
76
+ | 8.7803 | 6.91 | 19 | 6.7700 | 0.0806 |
77
+ | 7.5847 | 8.0 | 22 | 6.1880 | 0.0806 |
78
+ | 7.5847 | 8.73 | 24 | 5.7783 | 0.0806 |
79
+ | 7.5847 | 9.82 | 27 | 5.2113 | 0.0806 |
80
+ | 5.7442 | 10.91 | 30 | 4.7163 | 0.0806 |
81
+ | 5.7442 | 12.0 | 33 | 4.2648 | 0.0806 |
82
+ | 5.7442 | 12.73 | 35 | 3.9892 | 0.0806 |
83
+ | 5.7442 | 13.82 | 38 | 3.6134 | 0.0806 |
84
+ | 4.1747 | 14.91 | 41 | 3.2828 | 0.0806 |
85
+ | 4.1747 | 16.0 | 44 | 2.9957 | 0.0806 |
86
+ | 4.1747 | 16.73 | 46 | 2.8259 | 0.0806 |
87
+ | 4.1747 | 17.82 | 49 | 2.5988 | 0.0806 |
88
+ | 3.0458 | 18.91 | 52 | 2.4004 | 0.0806 |
89
+ | 3.0458 | 20.0 | 55 | 2.2272 | 0.0806 |
90
+ | 3.0458 | 20.73 | 57 | 2.1254 | 0.0806 |
91
+ | 2.3301 | 21.82 | 60 | 1.9937 | 0.0806 |
92
+ | 2.3301 | 22.91 | 63 | 1.8860 | 0.0806 |
93
+ | 2.3301 | 24.0 | 66 | 1.8005 | 0.0806 |
94
+ | 2.3301 | 24.73 | 68 | 1.7551 | 0.0806 |
95
+ | 1.9107 | 25.82 | 71 | 1.7021 | 0.0806 |
96
+ | 1.9107 | 26.91 | 74 | 1.6654 | 0.0806 |
97
+ | 1.9107 | 28.0 | 77 | 1.6434 | 0.0806 |
98
+ | 1.9107 | 28.73 | 79 | 1.6362 | 0.0806 |
99
+ | 1.7061 | 29.09 | 80 | 1.6348 | 0.0806 |
100
+
101
+
102
+ ### Framework versions
103
+
104
+ - Transformers 4.36.2
105
+ - Pytorch 2.1.2+cu118
106
+ - Datasets 2.16.1
107
+ - Tokenizers 0.15.0
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.09,
3
+ "eval_accuracy": 0.08064516129032258,
4
+ "eval_loss": 8.883430480957031,
5
+ "eval_runtime": 2.5622,
6
+ "eval_samples_per_second": 24.198,
7
+ "eval_steps_per_second": 0.781,
8
+ "train_loss": 4.409568953514099,
9
+ "train_runtime": 541.1993,
10
+ "train_samples_per_second": 25.868,
11
+ "train_steps_per_second": 0.148
12
+ }
config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/swinv2-tiny-patch4-window8-256",
3
+ "architectures": [
4
+ "Swinv2ForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "depths": [
8
+ 2,
9
+ 2,
10
+ 6,
11
+ 2
12
+ ],
13
+ "drop_path_rate": 0.1,
14
+ "embed_dim": 96,
15
+ "encoder_stride": 32,
16
+ "hidden_act": "gelu",
17
+ "hidden_dropout_prob": 0.0,
18
+ "hidden_size": 768,
19
+ "id2label": {
20
+ "0": "active",
21
+ "1": "active-inactive",
22
+ "2": "healthy",
23
+ "3": "inactive"
24
+ },
25
+ "image_size": 256,
26
+ "initializer_range": 0.02,
27
+ "label2id": {
28
+ "active": 0,
29
+ "active-inactive": 1,
30
+ "healthy": 2,
31
+ "inactive": 3
32
+ },
33
+ "layer_norm_eps": 1e-05,
34
+ "mlp_ratio": 4.0,
35
+ "model_type": "swinv2",
36
+ "num_channels": 3,
37
+ "num_heads": [
38
+ 3,
39
+ 6,
40
+ 12,
41
+ 24
42
+ ],
43
+ "num_layers": 4,
44
+ "patch_size": 4,
45
+ "path_norm": true,
46
+ "pretrained_window_sizes": [
47
+ 0,
48
+ 0,
49
+ 0,
50
+ 0
51
+ ],
52
+ "problem_type": "single_label_classification",
53
+ "qkv_bias": true,
54
+ "torch_dtype": "float32",
55
+ "transformers_version": "4.36.2",
56
+ "use_absolute_embeddings": false,
57
+ "window_size": 8
58
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.09,
3
+ "eval_accuracy": 0.08064516129032258,
4
+ "eval_loss": 8.883430480957031,
5
+ "eval_runtime": 2.5622,
6
+ "eval_samples_per_second": 24.198,
7
+ "eval_steps_per_second": 0.781
8
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2046d4398c99900e190202d9adf3e1f7972aa1a911326e467b32625d0cee42f5
3
+ size 110356296
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.485,
7
+ 0.456,
8
+ 0.406
9
+ ],
10
+ "image_processor_type": "ViTImageProcessor",
11
+ "image_std": [
12
+ 0.229,
13
+ 0.224,
14
+ 0.225
15
+ ],
16
+ "resample": 3,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 256,
20
+ "width": 256
21
+ }
22
+ }
runs/Oct13_08-20-54_DESKTOP-SKBE9FB/events.out.tfevents.1728829255.DESKTOP-SKBE9FB.18160.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fb7383e6e0daa8b1ac017a9cff12f92db91b3a82c492e2a69f61977e050f106
3
+ size 15948
runs/Oct13_08-20-54_DESKTOP-SKBE9FB/events.out.tfevents.1728829799.DESKTOP-SKBE9FB.18160.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06ae50eaf7bec7fcc2c8a1d8bca50337126b3891c42b990245c21d03299f5cc3
3
+ size 405
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.09,
3
+ "train_loss": 4.409568953514099,
4
+ "train_runtime": 541.1993,
5
+ "train_samples_per_second": 25.868,
6
+ "train_steps_per_second": 0.148
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.08064516129032258,
3
+ "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-Ocular-Toxoplasmosis\\checkpoint-2",
4
+ "epoch": 29.09090909090909,
5
+ "eval_steps": 500,
6
+ "global_step": 80,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.73,
13
+ "eval_accuracy": 0.08064516129032258,
14
+ "eval_loss": 8.883430480957031,
15
+ "eval_runtime": 2.2637,
16
+ "eval_samples_per_second": 27.389,
17
+ "eval_steps_per_second": 0.884,
18
+ "step": 2
19
+ },
20
+ {
21
+ "epoch": 1.82,
22
+ "eval_accuracy": 0.08064516129032258,
23
+ "eval_loss": 8.852208137512207,
24
+ "eval_runtime": 2.3018,
25
+ "eval_samples_per_second": 26.935,
26
+ "eval_steps_per_second": 0.869,
27
+ "step": 5
28
+ },
29
+ {
30
+ "epoch": 2.91,
31
+ "eval_accuracy": 0.08064516129032258,
32
+ "eval_loss": 8.700010299682617,
33
+ "eval_runtime": 2.5761,
34
+ "eval_samples_per_second": 24.068,
35
+ "eval_steps_per_second": 0.776,
36
+ "step": 8
37
+ },
38
+ {
39
+ "epoch": 3.64,
40
+ "learning_rate": 4.8611111111111115e-05,
41
+ "loss": 8.7803,
42
+ "step": 10
43
+ },
44
+ {
45
+ "epoch": 4.0,
46
+ "eval_accuracy": 0.08064516129032258,
47
+ "eval_loss": 8.269183158874512,
48
+ "eval_runtime": 2.5329,
49
+ "eval_samples_per_second": 24.478,
50
+ "eval_steps_per_second": 0.79,
51
+ "step": 11
52
+ },
53
+ {
54
+ "epoch": 4.73,
55
+ "eval_accuracy": 0.08064516129032258,
56
+ "eval_loss": 7.88364839553833,
57
+ "eval_runtime": 2.4337,
58
+ "eval_samples_per_second": 25.475,
59
+ "eval_steps_per_second": 0.822,
60
+ "step": 13
61
+ },
62
+ {
63
+ "epoch": 5.82,
64
+ "eval_accuracy": 0.08064516129032258,
65
+ "eval_loss": 7.327876091003418,
66
+ "eval_runtime": 2.5074,
67
+ "eval_samples_per_second": 24.727,
68
+ "eval_steps_per_second": 0.798,
69
+ "step": 16
70
+ },
71
+ {
72
+ "epoch": 6.91,
73
+ "eval_accuracy": 0.08064516129032258,
74
+ "eval_loss": 6.769954204559326,
75
+ "eval_runtime": 2.6471,
76
+ "eval_samples_per_second": 23.422,
77
+ "eval_steps_per_second": 0.756,
78
+ "step": 19
79
+ },
80
+ {
81
+ "epoch": 7.27,
82
+ "learning_rate": 4.166666666666667e-05,
83
+ "loss": 7.5847,
84
+ "step": 20
85
+ },
86
+ {
87
+ "epoch": 8.0,
88
+ "eval_accuracy": 0.08064516129032258,
89
+ "eval_loss": 6.1880202293396,
90
+ "eval_runtime": 2.602,
91
+ "eval_samples_per_second": 23.828,
92
+ "eval_steps_per_second": 0.769,
93
+ "step": 22
94
+ },
95
+ {
96
+ "epoch": 8.73,
97
+ "eval_accuracy": 0.08064516129032258,
98
+ "eval_loss": 5.778294563293457,
99
+ "eval_runtime": 2.4341,
100
+ "eval_samples_per_second": 25.471,
101
+ "eval_steps_per_second": 0.822,
102
+ "step": 24
103
+ },
104
+ {
105
+ "epoch": 9.82,
106
+ "eval_accuracy": 0.08064516129032258,
107
+ "eval_loss": 5.21131706237793,
108
+ "eval_runtime": 2.3164,
109
+ "eval_samples_per_second": 26.766,
110
+ "eval_steps_per_second": 0.863,
111
+ "step": 27
112
+ },
113
+ {
114
+ "epoch": 10.91,
115
+ "learning_rate": 3.472222222222222e-05,
116
+ "loss": 5.7442,
117
+ "step": 30
118
+ },
119
+ {
120
+ "epoch": 10.91,
121
+ "eval_accuracy": 0.08064516129032258,
122
+ "eval_loss": 4.716261386871338,
123
+ "eval_runtime": 2.4233,
124
+ "eval_samples_per_second": 25.585,
125
+ "eval_steps_per_second": 0.825,
126
+ "step": 30
127
+ },
128
+ {
129
+ "epoch": 12.0,
130
+ "eval_accuracy": 0.08064516129032258,
131
+ "eval_loss": 4.264786720275879,
132
+ "eval_runtime": 2.513,
133
+ "eval_samples_per_second": 24.671,
134
+ "eval_steps_per_second": 0.796,
135
+ "step": 33
136
+ },
137
+ {
138
+ "epoch": 12.73,
139
+ "eval_accuracy": 0.08064516129032258,
140
+ "eval_loss": 3.989229202270508,
141
+ "eval_runtime": 2.4651,
142
+ "eval_samples_per_second": 25.151,
143
+ "eval_steps_per_second": 0.811,
144
+ "step": 35
145
+ },
146
+ {
147
+ "epoch": 13.82,
148
+ "eval_accuracy": 0.08064516129032258,
149
+ "eval_loss": 3.6134493350982666,
150
+ "eval_runtime": 2.6037,
151
+ "eval_samples_per_second": 23.812,
152
+ "eval_steps_per_second": 0.768,
153
+ "step": 38
154
+ },
155
+ {
156
+ "epoch": 14.55,
157
+ "learning_rate": 2.777777777777778e-05,
158
+ "loss": 4.1747,
159
+ "step": 40
160
+ },
161
+ {
162
+ "epoch": 14.91,
163
+ "eval_accuracy": 0.08064516129032258,
164
+ "eval_loss": 3.2827646732330322,
165
+ "eval_runtime": 2.687,
166
+ "eval_samples_per_second": 23.074,
167
+ "eval_steps_per_second": 0.744,
168
+ "step": 41
169
+ },
170
+ {
171
+ "epoch": 16.0,
172
+ "eval_accuracy": 0.08064516129032258,
173
+ "eval_loss": 2.9957385063171387,
174
+ "eval_runtime": 2.4174,
175
+ "eval_samples_per_second": 25.647,
176
+ "eval_steps_per_second": 0.827,
177
+ "step": 44
178
+ },
179
+ {
180
+ "epoch": 16.73,
181
+ "eval_accuracy": 0.08064516129032258,
182
+ "eval_loss": 2.825892686843872,
183
+ "eval_runtime": 2.3083,
184
+ "eval_samples_per_second": 26.86,
185
+ "eval_steps_per_second": 0.866,
186
+ "step": 46
187
+ },
188
+ {
189
+ "epoch": 17.82,
190
+ "eval_accuracy": 0.08064516129032258,
191
+ "eval_loss": 2.5987932682037354,
192
+ "eval_runtime": 2.4694,
193
+ "eval_samples_per_second": 25.107,
194
+ "eval_steps_per_second": 0.81,
195
+ "step": 49
196
+ },
197
+ {
198
+ "epoch": 18.18,
199
+ "learning_rate": 2.0833333333333336e-05,
200
+ "loss": 3.0458,
201
+ "step": 50
202
+ },
203
+ {
204
+ "epoch": 18.91,
205
+ "eval_accuracy": 0.08064516129032258,
206
+ "eval_loss": 2.400411367416382,
207
+ "eval_runtime": 2.3426,
208
+ "eval_samples_per_second": 26.467,
209
+ "eval_steps_per_second": 0.854,
210
+ "step": 52
211
+ },
212
+ {
213
+ "epoch": 20.0,
214
+ "eval_accuracy": 0.08064516129032258,
215
+ "eval_loss": 2.227222204208374,
216
+ "eval_runtime": 2.4914,
217
+ "eval_samples_per_second": 24.885,
218
+ "eval_steps_per_second": 0.803,
219
+ "step": 55
220
+ },
221
+ {
222
+ "epoch": 20.73,
223
+ "eval_accuracy": 0.08064516129032258,
224
+ "eval_loss": 2.125420331954956,
225
+ "eval_runtime": 2.3746,
226
+ "eval_samples_per_second": 26.11,
227
+ "eval_steps_per_second": 0.842,
228
+ "step": 57
229
+ },
230
+ {
231
+ "epoch": 21.82,
232
+ "learning_rate": 1.388888888888889e-05,
233
+ "loss": 2.3301,
234
+ "step": 60
235
+ },
236
+ {
237
+ "epoch": 21.82,
238
+ "eval_accuracy": 0.08064516129032258,
239
+ "eval_loss": 1.9937151670455933,
240
+ "eval_runtime": 2.4362,
241
+ "eval_samples_per_second": 25.449,
242
+ "eval_steps_per_second": 0.821,
243
+ "step": 60
244
+ },
245
+ {
246
+ "epoch": 22.91,
247
+ "eval_accuracy": 0.08064516129032258,
248
+ "eval_loss": 1.885993242263794,
249
+ "eval_runtime": 2.4078,
250
+ "eval_samples_per_second": 25.749,
251
+ "eval_steps_per_second": 0.831,
252
+ "step": 63
253
+ },
254
+ {
255
+ "epoch": 24.0,
256
+ "eval_accuracy": 0.08064516129032258,
257
+ "eval_loss": 1.8005385398864746,
258
+ "eval_runtime": 2.3561,
259
+ "eval_samples_per_second": 26.314,
260
+ "eval_steps_per_second": 0.849,
261
+ "step": 66
262
+ },
263
+ {
264
+ "epoch": 24.73,
265
+ "eval_accuracy": 0.08064516129032258,
266
+ "eval_loss": 1.7550740242004395,
267
+ "eval_runtime": 2.3863,
268
+ "eval_samples_per_second": 25.981,
269
+ "eval_steps_per_second": 0.838,
270
+ "step": 68
271
+ },
272
+ {
273
+ "epoch": 25.45,
274
+ "learning_rate": 6.944444444444445e-06,
275
+ "loss": 1.9107,
276
+ "step": 70
277
+ },
278
+ {
279
+ "epoch": 25.82,
280
+ "eval_accuracy": 0.08064516129032258,
281
+ "eval_loss": 1.7021311521530151,
282
+ "eval_runtime": 2.3225,
283
+ "eval_samples_per_second": 26.696,
284
+ "eval_steps_per_second": 0.861,
285
+ "step": 71
286
+ },
287
+ {
288
+ "epoch": 26.91,
289
+ "eval_accuracy": 0.08064516129032258,
290
+ "eval_loss": 1.6653900146484375,
291
+ "eval_runtime": 2.59,
292
+ "eval_samples_per_second": 23.939,
293
+ "eval_steps_per_second": 0.772,
294
+ "step": 74
295
+ },
296
+ {
297
+ "epoch": 28.0,
298
+ "eval_accuracy": 0.08064516129032258,
299
+ "eval_loss": 1.6433522701263428,
300
+ "eval_runtime": 2.5188,
301
+ "eval_samples_per_second": 24.615,
302
+ "eval_steps_per_second": 0.794,
303
+ "step": 77
304
+ },
305
+ {
306
+ "epoch": 28.73,
307
+ "eval_accuracy": 0.08064516129032258,
308
+ "eval_loss": 1.6361864805221558,
309
+ "eval_runtime": 2.3834,
310
+ "eval_samples_per_second": 26.013,
311
+ "eval_steps_per_second": 0.839,
312
+ "step": 79
313
+ },
314
+ {
315
+ "epoch": 29.09,
316
+ "learning_rate": 0.0,
317
+ "loss": 1.7061,
318
+ "step": 80
319
+ },
320
+ {
321
+ "epoch": 29.09,
322
+ "eval_accuracy": 0.08064516129032258,
323
+ "eval_loss": 1.6347676515579224,
324
+ "eval_runtime": 2.4175,
325
+ "eval_samples_per_second": 25.646,
326
+ "eval_steps_per_second": 0.827,
327
+ "step": 80
328
+ },
329
+ {
330
+ "epoch": 29.09,
331
+ "step": 80,
332
+ "total_flos": 3.312830060612813e+17,
333
+ "train_loss": 4.409568953514099,
334
+ "train_runtime": 541.1993,
335
+ "train_samples_per_second": 25.868,
336
+ "train_steps_per_second": 0.148
337
+ }
338
+ ],
339
+ "logging_steps": 10,
340
+ "max_steps": 80,
341
+ "num_input_tokens_seen": 0,
342
+ "num_train_epochs": 40,
343
+ "save_steps": 500,
344
+ "total_flos": 3.312830060612813e+17,
345
+ "train_batch_size": 32,
346
+ "trial_name": null,
347
+ "trial_params": null
348
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:707b23c3a5fc468561b9ad9ec6c5cb53ee88b1b9a1f9cd003dd50ee2da9987b5
3
+ size 4792