tdhcuong commited on
Commit
fbe6081
1 Parent(s): 7be56e3

End of training

Browse files
Files changed (5) hide show
  1. README.md +1 -1
  2. all_results.json +11 -11
  3. eval_results.json +6 -6
  4. train_results.json +6 -6
  5. trainer_state.json +139 -65
README.md CHANGED
@@ -33,7 +33,7 @@ should probably proofread and complete it, then remove this comment. -->
33
 
34
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the imagefolder dataset.
35
  It achieves the following results on the evaluation set:
36
- - Loss: 0.1988
37
  - Accuracy: 0.9105
38
 
39
  ## Model description
 
33
 
34
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the imagefolder dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 0.2100
37
  - Accuracy: 0.9105
38
 
39
  ## Model description
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 0.9036144578313253,
4
- "eval_loss": 0.25001978874206543,
5
- "eval_runtime": 11.7154,
6
- "eval_samples_per_second": 49.593,
7
- "eval_steps_per_second": 1.622,
8
- "total_flos": 3.892774168288051e+17,
9
- "train_loss": 0.44857889656128924,
10
- "train_runtime": 357.7744,
11
- "train_samples_per_second": 43.771,
12
- "train_steps_per_second": 0.344
13
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.9104991394148021,
4
+ "eval_loss": 0.21004654467105865,
5
+ "eval_runtime": 11.6442,
6
+ "eval_samples_per_second": 49.896,
7
+ "eval_steps_per_second": 1.632,
8
+ "total_flos": 6.487956947146752e+17,
9
+ "train_loss": 0.20639470670281387,
10
+ "train_runtime": 554.206,
11
+ "train_samples_per_second": 47.094,
12
+ "train_steps_per_second": 0.37
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 0.9036144578313253,
4
- "eval_loss": 0.25001978874206543,
5
- "eval_runtime": 11.7154,
6
- "eval_samples_per_second": 49.593,
7
- "eval_steps_per_second": 1.622
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.9104991394148021,
4
+ "eval_loss": 0.21004654467105865,
5
+ "eval_runtime": 11.6442,
6
+ "eval_samples_per_second": 49.896,
7
+ "eval_steps_per_second": 1.632
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "total_flos": 3.892774168288051e+17,
4
- "train_loss": 0.44857889656128924,
5
- "train_runtime": 357.7744,
6
- "train_samples_per_second": 43.771,
7
- "train_steps_per_second": 0.344
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "total_flos": 6.487956947146752e+17,
4
+ "train_loss": 0.20639470670281387,
5
+ "train_runtime": 554.206,
6
+ "train_samples_per_second": 47.094,
7
+ "train_steps_per_second": 0.37
8
  }
trainer_state.json CHANGED
@@ -1,138 +1,212 @@
1
  {
2
- "best_metric": 0.9036144578313253,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-poc\\checkpoint-82",
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 123,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.24390243902439024,
13
- "grad_norm": 39.657066345214844,
14
- "learning_rate": 3.846153846153846e-05,
15
- "loss": 1.4333,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.4878048780487805,
20
- "grad_norm": 23.66535758972168,
21
- "learning_rate": 4.681818181818182e-05,
22
- "loss": 0.8054,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.7317073170731707,
27
- "grad_norm": 14.953439712524414,
28
- "learning_rate": 4.2272727272727275e-05,
29
- "loss": 0.4905,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.975609756097561,
34
- "grad_norm": 21.484201431274414,
35
- "learning_rate": 3.7727272727272725e-05,
36
- "loss": 0.4233,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 1.0,
41
- "eval_accuracy": 0.8674698795180723,
42
- "eval_loss": 0.3217690885066986,
43
- "eval_runtime": 13.4934,
44
- "eval_samples_per_second": 43.058,
45
- "eval_steps_per_second": 1.408,
46
  "step": 41
47
  },
48
  {
49
  "epoch": 1.2195121951219512,
50
- "grad_norm": 10.432565689086914,
51
- "learning_rate": 3.318181818181819e-05,
52
- "loss": 0.3435,
53
  "step": 50
54
  },
55
  {
56
  "epoch": 1.4634146341463414,
57
- "grad_norm": 22.239377975463867,
58
- "learning_rate": 2.863636363636364e-05,
59
- "loss": 0.3269,
60
  "step": 60
61
  },
62
  {
63
  "epoch": 1.7073170731707317,
64
- "grad_norm": 14.019749641418457,
65
- "learning_rate": 2.4090909090909093e-05,
66
- "loss": 0.2853,
67
  "step": 70
68
  },
69
  {
70
  "epoch": 1.951219512195122,
71
- "grad_norm": 12.378067016601562,
72
- "learning_rate": 1.9545454545454546e-05,
73
- "loss": 0.3203,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 2.0,
78
- "eval_accuracy": 0.9036144578313253,
79
- "eval_loss": 0.25001978874206543,
80
- "eval_runtime": 11.6049,
81
- "eval_samples_per_second": 50.065,
82
- "eval_steps_per_second": 1.637,
83
  "step": 82
84
  },
85
  {
86
  "epoch": 2.1951219512195124,
87
- "grad_norm": 11.0855131149292,
88
- "learning_rate": 1.5e-05,
89
- "loss": 0.2641,
90
  "step": 90
91
  },
92
  {
93
  "epoch": 2.4390243902439024,
94
- "grad_norm": 8.276205062866211,
95
- "learning_rate": 1.0454545454545455e-05,
96
- "loss": 0.2553,
97
  "step": 100
98
  },
99
  {
100
  "epoch": 2.682926829268293,
101
- "grad_norm": 12.671364784240723,
102
- "learning_rate": 5.909090909090909e-06,
103
- "loss": 0.2546,
104
  "step": 110
105
  },
106
  {
107
  "epoch": 2.926829268292683,
108
- "grad_norm": 14.88228988647461,
109
- "learning_rate": 1.3636363636363636e-06,
110
- "loss": 0.2418,
111
  "step": 120
112
  },
113
  {
114
  "epoch": 3.0,
115
- "eval_accuracy": 0.8967297762478486,
116
- "eval_loss": 0.2369231879711151,
117
- "eval_runtime": 12.394,
118
- "eval_samples_per_second": 46.877,
119
- "eval_steps_per_second": 1.533,
120
  "step": 123
121
  },
122
  {
123
- "epoch": 3.0,
124
- "step": 123,
125
- "total_flos": 3.892774168288051e+17,
126
- "train_loss": 0.44857889656128924,
127
- "train_runtime": 357.7744,
128
- "train_samples_per_second": 43.771,
129
- "train_steps_per_second": 0.344
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  }
131
  ],
132
  "logging_steps": 10,
133
- "max_steps": 123,
134
  "num_input_tokens_seen": 0,
135
- "num_train_epochs": 3,
136
  "save_steps": 500,
137
  "stateful_callbacks": {
138
  "TrainerControl": {
@@ -146,7 +220,7 @@
146
  "attributes": {}
147
  }
148
  },
149
- "total_flos": 3.892774168288051e+17,
150
  "train_batch_size": 32,
151
  "trial_name": null,
152
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9104991394148021,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-poc\\checkpoint-164",
4
+ "epoch": 5.0,
5
  "eval_steps": 500,
6
+ "global_step": 205,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.24390243902439024,
13
+ "grad_norm": 18.683757781982422,
14
+ "learning_rate": 2.380952380952381e-05,
15
+ "loss": 0.2672,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.4878048780487805,
20
+ "grad_norm": 17.49846649169922,
21
+ "learning_rate": 4.761904761904762e-05,
22
+ "loss": 0.2349,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.7317073170731707,
27
+ "grad_norm": 11.330144882202148,
28
+ "learning_rate": 4.7554347826086956e-05,
29
+ "loss": 0.2134,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.975609756097561,
34
+ "grad_norm": 24.904529571533203,
35
+ "learning_rate": 4.483695652173913e-05,
36
+ "loss": 0.2261,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 1.0,
41
+ "eval_accuracy": 0.8984509466437177,
42
+ "eval_loss": 0.23582199215888977,
43
+ "eval_runtime": 11.0074,
44
+ "eval_samples_per_second": 52.783,
45
+ "eval_steps_per_second": 1.726,
46
  "step": 41
47
  },
48
  {
49
  "epoch": 1.2195121951219512,
50
+ "grad_norm": 8.899707794189453,
51
+ "learning_rate": 4.2119565217391304e-05,
52
+ "loss": 0.1967,
53
  "step": 50
54
  },
55
  {
56
  "epoch": 1.4634146341463414,
57
+ "grad_norm": 18.724058151245117,
58
+ "learning_rate": 3.940217391304348e-05,
59
+ "loss": 0.1736,
60
  "step": 60
61
  },
62
  {
63
  "epoch": 1.7073170731707317,
64
+ "grad_norm": 15.106910705566406,
65
+ "learning_rate": 3.668478260869566e-05,
66
+ "loss": 0.1903,
67
  "step": 70
68
  },
69
  {
70
  "epoch": 1.951219512195122,
71
+ "grad_norm": 14.325936317443848,
72
+ "learning_rate": 3.3967391304347826e-05,
73
+ "loss": 0.2418,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 2.0,
78
+ "eval_accuracy": 0.9087779690189329,
79
+ "eval_loss": 0.21669870615005493,
80
+ "eval_runtime": 11.4581,
81
+ "eval_samples_per_second": 50.707,
82
+ "eval_steps_per_second": 1.658,
83
  "step": 82
84
  },
85
  {
86
  "epoch": 2.1951219512195124,
87
+ "grad_norm": 11.764991760253906,
88
+ "learning_rate": 3.125e-05,
89
+ "loss": 0.2116,
90
  "step": 90
91
  },
92
  {
93
  "epoch": 2.4390243902439024,
94
+ "grad_norm": 8.702162742614746,
95
+ "learning_rate": 2.8532608695652175e-05,
96
+ "loss": 0.2111,
97
  "step": 100
98
  },
99
  {
100
  "epoch": 2.682926829268293,
101
+ "grad_norm": 15.251940727233887,
102
+ "learning_rate": 2.5815217391304346e-05,
103
+ "loss": 0.2149,
104
  "step": 110
105
  },
106
  {
107
  "epoch": 2.926829268292683,
108
+ "grad_norm": 19.997068405151367,
109
+ "learning_rate": 2.3097826086956523e-05,
110
+ "loss": 0.2044,
111
  "step": 120
112
  },
113
  {
114
  "epoch": 3.0,
115
+ "eval_accuracy": 0.9036144578313253,
116
+ "eval_loss": 0.20651938021183014,
117
+ "eval_runtime": 11.4071,
118
+ "eval_samples_per_second": 50.933,
119
+ "eval_steps_per_second": 1.666,
120
  "step": 123
121
  },
122
  {
123
+ "epoch": 3.1707317073170733,
124
+ "grad_norm": 19.748979568481445,
125
+ "learning_rate": 2.0380434782608694e-05,
126
+ "loss": 0.206,
127
+ "step": 130
128
+ },
129
+ {
130
+ "epoch": 3.4146341463414633,
131
+ "grad_norm": 18.73997688293457,
132
+ "learning_rate": 1.766304347826087e-05,
133
+ "loss": 0.1849,
134
+ "step": 140
135
+ },
136
+ {
137
+ "epoch": 3.658536585365854,
138
+ "grad_norm": 12.744668006896973,
139
+ "learning_rate": 1.4945652173913044e-05,
140
+ "loss": 0.1857,
141
+ "step": 150
142
+ },
143
+ {
144
+ "epoch": 3.902439024390244,
145
+ "grad_norm": 14.340224266052246,
146
+ "learning_rate": 1.2228260869565218e-05,
147
+ "loss": 0.1995,
148
+ "step": 160
149
+ },
150
+ {
151
+ "epoch": 4.0,
152
+ "eval_accuracy": 0.9104991394148021,
153
+ "eval_loss": 0.21004654467105865,
154
+ "eval_runtime": 11.0045,
155
+ "eval_samples_per_second": 52.797,
156
+ "eval_steps_per_second": 1.727,
157
+ "step": 164
158
+ },
159
+ {
160
+ "epoch": 4.146341463414634,
161
+ "grad_norm": 12.231698036193848,
162
+ "learning_rate": 9.510869565217392e-06,
163
+ "loss": 0.1822,
164
+ "step": 170
165
+ },
166
+ {
167
+ "epoch": 4.390243902439025,
168
+ "grad_norm": 18.93744468688965,
169
+ "learning_rate": 6.7934782608695655e-06,
170
+ "loss": 0.1864,
171
+ "step": 180
172
+ },
173
+ {
174
+ "epoch": 4.634146341463414,
175
+ "grad_norm": 15.003849983215332,
176
+ "learning_rate": 4.07608695652174e-06,
177
+ "loss": 0.2083,
178
+ "step": 190
179
+ },
180
+ {
181
+ "epoch": 4.878048780487805,
182
+ "grad_norm": 24.660541534423828,
183
+ "learning_rate": 1.3586956521739131e-06,
184
+ "loss": 0.1972,
185
+ "step": 200
186
+ },
187
+ {
188
+ "epoch": 5.0,
189
+ "eval_accuracy": 0.9104991394148021,
190
+ "eval_loss": 0.19883577525615692,
191
+ "eval_runtime": 12.4083,
192
+ "eval_samples_per_second": 46.823,
193
+ "eval_steps_per_second": 1.531,
194
+ "step": 205
195
+ },
196
+ {
197
+ "epoch": 5.0,
198
+ "step": 205,
199
+ "total_flos": 6.487956947146752e+17,
200
+ "train_loss": 0.20639470670281387,
201
+ "train_runtime": 554.206,
202
+ "train_samples_per_second": 47.094,
203
+ "train_steps_per_second": 0.37
204
  }
205
  ],
206
  "logging_steps": 10,
207
+ "max_steps": 205,
208
  "num_input_tokens_seen": 0,
209
+ "num_train_epochs": 5,
210
  "save_steps": 500,
211
  "stateful_callbacks": {
212
  "TrainerControl": {
 
220
  "attributes": {}
221
  }
222
  },
223
+ "total_flos": 6.487956947146752e+17,
224
  "train_batch_size": 32,
225
  "trial_name": null,
226
  "trial_params": null