HorcruxNo13 commited on
Commit
4db333f
1 Parent(s): 5e6cd35

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 15.0,
3
  "total_flos": 1.16237984421888e+18,
4
- "train_loss": 0.3610760450363159,
5
- "train_runtime": 974.8105,
6
- "train_samples_per_second": 15.388,
7
- "train_steps_per_second": 0.062
8
  }
 
1
  {
2
  "epoch": 15.0,
3
  "total_flos": 1.16237984421888e+18,
4
+ "train_loss": 0.475440772374471,
5
+ "train_runtime": 1200.7807,
6
+ "train_samples_per_second": 12.492,
7
+ "train_steps_per_second": 0.05
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7aff6f29850e45f9d3809502ee06082d45a2c6e2e81451cb035264406def030
3
  size 343268717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70e0c5f48e4cab99d8880a0d85047bb62b1211338a7363e01dd3f6fc698f564a
3
  size 343268717
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 15.0,
3
  "total_flos": 1.16237984421888e+18,
4
- "train_loss": 0.3610760450363159,
5
- "train_runtime": 974.8105,
6
- "train_samples_per_second": 15.388,
7
- "train_steps_per_second": 0.062
8
  }
 
1
  {
2
  "epoch": 15.0,
3
  "total_flos": 1.16237984421888e+18,
4
+ "train_loss": 0.475440772374471,
5
+ "train_runtime": 1200.7807,
6
+ "train_samples_per_second": 12.492,
7
+ "train_steps_per_second": 0.05
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.9083333333333333,
3
- "best_model_checkpoint": "vit-base-patch16-224/checkpoint-40",
4
  "epoch": 15.0,
5
  "eval_steps": 500,
6
  "global_step": 60,
@@ -10,227 +10,227 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.8666666666666667,
14
- "eval_f1_score": 0.8224224224224224,
15
- "eval_loss": 0.5036832094192505,
16
- "eval_precision": 0.8150354609929079,
17
- "eval_recall": 0.8666666666666667,
18
- "eval_runtime": 4.0852,
19
- "eval_samples_per_second": 58.748,
20
- "eval_steps_per_second": 0.979,
21
  "step": 4
22
  },
23
  {
24
  "epoch": 2.0,
25
- "eval_accuracy": 0.8708333333333333,
26
- "eval_f1_score": 0.8107089829250185,
27
- "eval_loss": 0.35003846883773804,
28
- "eval_precision": 0.7583506944444445,
29
- "eval_recall": 0.8708333333333333,
30
- "eval_runtime": 4.1025,
31
- "eval_samples_per_second": 58.501,
32
- "eval_steps_per_second": 0.975,
33
  "step": 8
34
  },
35
  {
36
  "epoch": 3.0,
37
- "eval_accuracy": 0.8708333333333333,
38
- "eval_f1_score": 0.8107089829250185,
39
- "eval_loss": 0.3154493272304535,
40
- "eval_precision": 0.7583506944444445,
41
- "eval_recall": 0.8708333333333333,
42
- "eval_runtime": 3.9805,
43
- "eval_samples_per_second": 60.295,
44
- "eval_steps_per_second": 1.005,
45
  "step": 12
46
  },
47
  {
48
  "epoch": 3.75,
49
  "learning_rate": 4.166666666666667e-05,
50
- "loss": 0.5284,
51
  "step": 15
52
  },
53
  {
54
  "epoch": 4.0,
55
- "eval_accuracy": 0.8833333333333333,
56
- "eval_f1_score": 0.849658648884655,
57
- "eval_loss": 0.2973836362361908,
58
- "eval_precision": 0.8659207030451664,
59
- "eval_recall": 0.8833333333333333,
60
- "eval_runtime": 3.9445,
61
- "eval_samples_per_second": 60.844,
62
- "eval_steps_per_second": 1.014,
63
  "step": 16
64
  },
65
  {
66
  "epoch": 5.0,
67
- "eval_accuracy": 0.8875,
68
- "eval_f1_score": 0.8768099547511312,
69
- "eval_loss": 0.2953941226005554,
70
- "eval_precision": 0.873125,
71
- "eval_recall": 0.8875,
72
- "eval_runtime": 4.0614,
73
- "eval_samples_per_second": 59.092,
74
- "eval_steps_per_second": 0.985,
75
  "step": 20
76
  },
77
  {
78
  "epoch": 6.0,
79
- "eval_accuracy": 0.8958333333333334,
80
- "eval_f1_score": 0.8716479619238106,
81
- "eval_loss": 0.2720634639263153,
82
- "eval_precision": 0.8870833333333333,
83
- "eval_recall": 0.8958333333333334,
84
- "eval_runtime": 4.0196,
85
- "eval_samples_per_second": 59.708,
86
- "eval_steps_per_second": 0.995,
87
  "step": 24
88
  },
89
  {
90
  "epoch": 7.0,
91
- "eval_accuracy": 0.8875,
92
- "eval_f1_score": 0.852667622475749,
93
- "eval_loss": 0.26786690950393677,
94
- "eval_precision": 0.881712962962963,
95
- "eval_recall": 0.8875,
96
- "eval_runtime": 4.2691,
97
- "eval_samples_per_second": 56.218,
98
- "eval_steps_per_second": 0.937,
99
  "step": 28
100
  },
101
  {
102
  "epoch": 7.5,
103
  "learning_rate": 2.777777777777778e-05,
104
- "loss": 0.3362,
105
  "step": 30
106
  },
107
  {
108
  "epoch": 8.0,
109
- "eval_accuracy": 0.8875,
110
- "eval_f1_score": 0.852667622475749,
111
- "eval_loss": 0.26340213418006897,
112
- "eval_precision": 0.881712962962963,
113
- "eval_recall": 0.8875,
114
- "eval_runtime": 4.2103,
115
- "eval_samples_per_second": 57.002,
116
- "eval_steps_per_second": 0.95,
117
  "step": 32
118
  },
119
  {
120
  "epoch": 9.0,
121
- "eval_accuracy": 0.9041666666666667,
122
- "eval_f1_score": 0.8879374201787995,
123
- "eval_loss": 0.25068628787994385,
124
- "eval_precision": 0.8952565318162663,
125
- "eval_recall": 0.9041666666666667,
126
- "eval_runtime": 3.9517,
127
- "eval_samples_per_second": 60.733,
128
- "eval_steps_per_second": 1.012,
129
  "step": 36
130
  },
131
  {
132
  "epoch": 10.0,
133
- "eval_accuracy": 0.9083333333333333,
134
- "eval_f1_score": 0.8940810124891471,
135
- "eval_loss": 0.24386708438396454,
136
- "eval_precision": 0.9006296296296298,
137
- "eval_recall": 0.9083333333333333,
138
- "eval_runtime": 3.9449,
139
- "eval_samples_per_second": 60.839,
140
- "eval_steps_per_second": 1.014,
141
  "step": 40
142
  },
143
  {
144
  "epoch": 11.0,
145
- "eval_accuracy": 0.8916666666666667,
146
- "eval_f1_score": 0.8884444626028217,
147
- "eval_loss": 0.25889885425567627,
148
- "eval_precision": 0.8860502521300643,
149
- "eval_recall": 0.8916666666666667,
150
- "eval_runtime": 4.1005,
151
- "eval_samples_per_second": 58.529,
152
- "eval_steps_per_second": 0.975,
153
  "step": 44
154
  },
155
  {
156
  "epoch": 11.25,
157
  "learning_rate": 1.388888888888889e-05,
158
- "loss": 0.3017,
159
  "step": 45
160
  },
161
  {
162
  "epoch": 12.0,
163
- "eval_accuracy": 0.9083333333333333,
164
- "eval_f1_score": 0.9024039297513475,
165
- "eval_loss": 0.24283821880817413,
166
- "eval_precision": 0.9004925532625392,
167
- "eval_recall": 0.9083333333333333,
168
- "eval_runtime": 4.2192,
169
- "eval_samples_per_second": 56.883,
170
- "eval_steps_per_second": 0.948,
171
  "step": 48
172
  },
173
  {
174
  "epoch": 13.0,
175
- "eval_accuracy": 0.9,
176
- "eval_f1_score": 0.8970256577872201,
177
- "eval_loss": 0.2543129324913025,
178
- "eval_precision": 0.894922622152669,
179
- "eval_recall": 0.9,
180
- "eval_runtime": 4.181,
181
- "eval_samples_per_second": 57.402,
182
- "eval_steps_per_second": 0.957,
183
  "step": 52
184
  },
185
  {
186
  "epoch": 14.0,
187
- "eval_accuracy": 0.8958333333333334,
188
- "eval_f1_score": 0.8951038120948916,
189
- "eval_loss": 0.2651337683200836,
190
- "eval_precision": 0.8944246031746032,
191
- "eval_recall": 0.8958333333333334,
192
- "eval_runtime": 3.9408,
193
- "eval_samples_per_second": 60.902,
194
- "eval_steps_per_second": 1.015,
195
  "step": 56
196
  },
197
  {
198
  "epoch": 15.0,
199
  "learning_rate": 0.0,
200
- "loss": 0.278,
201
  "step": 60
202
  },
203
  {
204
  "epoch": 15.0,
205
- "eval_accuracy": 0.8958333333333334,
206
- "eval_f1_score": 0.8951038120948916,
207
- "eval_loss": 0.2636790871620178,
208
- "eval_precision": 0.8944246031746032,
209
- "eval_recall": 0.8958333333333334,
210
- "eval_runtime": 4.1679,
211
- "eval_samples_per_second": 57.584,
212
- "eval_steps_per_second": 0.96,
213
  "step": 60
214
  },
215
  {
216
  "epoch": 15.0,
217
  "step": 60,
218
  "total_flos": 1.16237984421888e+18,
219
- "train_loss": 0.3610760450363159,
220
- "train_runtime": 974.8105,
221
- "train_samples_per_second": 15.388,
222
- "train_steps_per_second": 0.062
223
  },
224
  {
225
  "epoch": 15.0,
226
- "eval_accuracy": 0.9033333333333333,
227
- "eval_f1_score": 0.888913443830571,
228
- "eval_loss": 0.24460919201374054,
229
- "eval_precision": 0.892075919335706,
230
- "eval_recall": 0.9033333333333333,
231
- "eval_runtime": 4.8461,
232
- "eval_samples_per_second": 61.906,
233
- "eval_steps_per_second": 1.032,
234
  "step": 60
235
  }
236
  ],
 
1
  {
2
+ "best_metric": 0.7916666666666666,
3
+ "best_model_checkpoint": "vit-base-patch16-224/checkpoint-52",
4
  "epoch": 15.0,
5
  "eval_steps": 500,
6
  "global_step": 60,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.7333333333333333,
14
+ "eval_f1_score": 0.6280486019616454,
15
+ "eval_loss": 0.6009885668754578,
16
+ "eval_precision": 0.6725490196078432,
17
+ "eval_recall": 0.7333333333333333,
18
+ "eval_runtime": 73.0728,
19
+ "eval_samples_per_second": 3.284,
20
+ "eval_steps_per_second": 0.055,
21
  "step": 4
22
  },
23
  {
24
  "epoch": 2.0,
25
+ "eval_accuracy": 0.7375,
26
+ "eval_f1_score": 0.6302131603336423,
27
+ "eval_loss": 0.5551710724830627,
28
+ "eval_precision": 0.8066945606694561,
29
+ "eval_recall": 0.7375,
30
+ "eval_runtime": 4.1084,
31
+ "eval_samples_per_second": 58.416,
32
+ "eval_steps_per_second": 0.974,
33
  "step": 8
34
  },
35
  {
36
  "epoch": 3.0,
37
+ "eval_accuracy": 0.7541666666666667,
38
+ "eval_f1_score": 0.6781685795384426,
39
+ "eval_loss": 0.5449685454368591,
40
+ "eval_precision": 0.7597883597883598,
41
+ "eval_recall": 0.7541666666666667,
42
+ "eval_runtime": 4.0131,
43
+ "eval_samples_per_second": 59.804,
44
+ "eval_steps_per_second": 0.997,
45
  "step": 12
46
  },
47
  {
48
  "epoch": 3.75,
49
  "learning_rate": 4.166666666666667e-05,
50
+ "loss": 0.576,
51
  "step": 15
52
  },
53
  {
54
  "epoch": 4.0,
55
+ "eval_accuracy": 0.75,
56
+ "eval_f1_score": 0.6641114982578398,
57
+ "eval_loss": 0.5325278043746948,
58
+ "eval_precision": 0.7706552706552707,
59
+ "eval_recall": 0.75,
60
+ "eval_runtime": 4.224,
61
+ "eval_samples_per_second": 56.818,
62
+ "eval_steps_per_second": 0.947,
63
  "step": 16
64
  },
65
  {
66
  "epoch": 5.0,
67
+ "eval_accuracy": 0.75,
68
+ "eval_f1_score": 0.6899999999999998,
69
+ "eval_loss": 0.5234411954879761,
70
+ "eval_precision": 0.7232142857142857,
71
+ "eval_recall": 0.75,
72
+ "eval_runtime": 3.9713,
73
+ "eval_samples_per_second": 60.434,
74
+ "eval_steps_per_second": 1.007,
75
  "step": 20
76
  },
77
  {
78
  "epoch": 6.0,
79
+ "eval_accuracy": 0.7625,
80
+ "eval_f1_score": 0.7075837742504409,
81
+ "eval_loss": 0.511196494102478,
82
+ "eval_precision": 0.7505671326826696,
83
+ "eval_recall": 0.7625,
84
+ "eval_runtime": 3.9183,
85
+ "eval_samples_per_second": 61.251,
86
+ "eval_steps_per_second": 1.021,
87
  "step": 24
88
  },
89
  {
90
  "epoch": 7.0,
91
+ "eval_accuracy": 0.7666666666666667,
92
+ "eval_f1_score": 0.7221264707039705,
93
+ "eval_loss": 0.5081896185874939,
94
+ "eval_precision": 0.7503197108701696,
95
+ "eval_recall": 0.7666666666666667,
96
+ "eval_runtime": 4.0209,
97
+ "eval_samples_per_second": 59.688,
98
+ "eval_steps_per_second": 0.995,
99
  "step": 28
100
  },
101
  {
102
  "epoch": 7.5,
103
  "learning_rate": 2.777777777777778e-05,
104
+ "loss": 0.4876,
105
  "step": 30
106
  },
107
  {
108
  "epoch": 8.0,
109
+ "eval_accuracy": 0.7666666666666667,
110
+ "eval_f1_score": 0.7287749287749288,
111
+ "eval_loss": 0.5066744089126587,
112
+ "eval_precision": 0.746633117661155,
113
+ "eval_recall": 0.7666666666666667,
114
+ "eval_runtime": 3.9922,
115
+ "eval_samples_per_second": 60.118,
116
+ "eval_steps_per_second": 1.002,
117
  "step": 32
118
  },
119
  {
120
  "epoch": 9.0,
121
+ "eval_accuracy": 0.7791666666666667,
122
+ "eval_f1_score": 0.7528159852134981,
123
+ "eval_loss": 0.5091281533241272,
124
+ "eval_precision": 0.76231884057971,
125
+ "eval_recall": 0.7791666666666667,
126
+ "eval_runtime": 4.1183,
127
+ "eval_samples_per_second": 58.276,
128
+ "eval_steps_per_second": 0.971,
129
  "step": 36
130
  },
131
  {
132
  "epoch": 10.0,
133
+ "eval_accuracy": 0.7583333333333333,
134
+ "eval_f1_score": 0.704514442129346,
135
+ "eval_loss": 0.5022971630096436,
136
+ "eval_precision": 0.7393393393393393,
137
+ "eval_recall": 0.7583333333333333,
138
+ "eval_runtime": 3.9137,
139
+ "eval_samples_per_second": 61.323,
140
+ "eval_steps_per_second": 1.022,
141
  "step": 40
142
  },
143
  {
144
  "epoch": 11.0,
145
+ "eval_accuracy": 0.7708333333333334,
146
+ "eval_f1_score": 0.7434882865423094,
147
+ "eval_loss": 0.4910615086555481,
148
+ "eval_precision": 0.7506953593910115,
149
+ "eval_recall": 0.7708333333333334,
150
+ "eval_runtime": 3.9528,
151
+ "eval_samples_per_second": 60.716,
152
+ "eval_steps_per_second": 1.012,
153
  "step": 44
154
  },
155
  {
156
  "epoch": 11.25,
157
  "learning_rate": 1.388888888888889e-05,
158
+ "loss": 0.4379,
159
  "step": 45
160
  },
161
  {
162
  "epoch": 12.0,
163
+ "eval_accuracy": 0.7666666666666667,
164
+ "eval_f1_score": 0.7513341298287536,
165
+ "eval_loss": 0.492078959941864,
166
+ "eval_precision": 0.7487322201607916,
167
+ "eval_recall": 0.7666666666666667,
168
+ "eval_runtime": 4.01,
169
+ "eval_samples_per_second": 59.85,
170
+ "eval_steps_per_second": 0.998,
171
  "step": 48
172
  },
173
  {
174
  "epoch": 13.0,
175
+ "eval_accuracy": 0.7916666666666666,
176
+ "eval_f1_score": 0.7679595398368771,
177
+ "eval_loss": 0.4905916154384613,
178
+ "eval_precision": 0.7791738054445079,
179
+ "eval_recall": 0.7916666666666666,
180
+ "eval_runtime": 4.0131,
181
+ "eval_samples_per_second": 59.804,
182
+ "eval_steps_per_second": 0.997,
183
  "step": 52
184
  },
185
  {
186
  "epoch": 14.0,
187
+ "eval_accuracy": 0.7875,
188
+ "eval_f1_score": 0.76446353296747,
189
+ "eval_loss": 0.49192750453948975,
190
+ "eval_precision": 0.7731010452961672,
191
+ "eval_recall": 0.7875,
192
+ "eval_runtime": 4.0774,
193
+ "eval_samples_per_second": 58.86,
194
+ "eval_steps_per_second": 0.981,
195
  "step": 56
196
  },
197
  {
198
  "epoch": 15.0,
199
  "learning_rate": 0.0,
200
+ "loss": 0.4003,
201
  "step": 60
202
  },
203
  {
204
  "epoch": 15.0,
205
+ "eval_accuracy": 0.7833333333333333,
206
+ "eval_f1_score": 0.7586779214303522,
207
+ "eval_loss": 0.49290910363197327,
208
+ "eval_precision": 0.7677707976394442,
209
+ "eval_recall": 0.7833333333333333,
210
+ "eval_runtime": 3.978,
211
+ "eval_samples_per_second": 60.332,
212
+ "eval_steps_per_second": 1.006,
213
  "step": 60
214
  },
215
  {
216
  "epoch": 15.0,
217
  "step": 60,
218
  "total_flos": 1.16237984421888e+18,
219
+ "train_loss": 0.475440772374471,
220
+ "train_runtime": 1200.7807,
221
+ "train_samples_per_second": 12.492,
222
+ "train_steps_per_second": 0.05
223
  },
224
  {
225
  "epoch": 15.0,
226
+ "eval_accuracy": 0.7833333333333333,
227
+ "eval_f1_score": 0.7558981348884555,
228
+ "eval_loss": 0.4771976172924042,
229
+ "eval_precision": 0.7701923076923076,
230
+ "eval_recall": 0.7833333333333333,
231
+ "eval_runtime": 93.6348,
232
+ "eval_samples_per_second": 3.204,
233
+ "eval_steps_per_second": 0.053,
234
  "step": 60
235
  }
236
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e3f28fa49c01ce58eb2cbd98f07b83111f96e4736164481dabe0c27dccfd18c
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7a4354514de5bcf1aa51ba982ed563501930d8c01edcbecc19581c7fc1d5d8d
3
  size 4027