:tada: update model 20230703
Browse files- config.json +1 -1
- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +190 -184
- training_args.bin +1 -1
config.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"_commit_hash": null,
|
3 |
-
"_name_or_path": "
|
4 |
"architectures": [
|
5 |
"VisionEncoderDecoderModel"
|
6 |
],
|
|
|
1 |
{
|
2 |
"_commit_hash": null,
|
3 |
+
"_name_or_path": "epochs_10/",
|
4 |
"architectures": [
|
5 |
"VisionEncoderDecoderModel"
|
6 |
],
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6036958
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4093e367e8b29225f56e34454e7ba3592096419b9593738fca1cc2866372c58
|
3 |
size 6036958
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2233126973
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9dc981772b6511fe1414a2ba29687bd62aaa3b2ac8324a14e77a0412109c3f40
|
3 |
size 2233126973
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14511
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31f264dff15f601257953d94b9d974863b70da54bcdd7f88d8dd26fa6d88f00c
|
3 |
size 14511
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70a0fed77283d69bee0d73b41955100ee1ff922fc17871087fb2cef1e4c37fa4
|
3 |
size 627
|
trainer_state.json
CHANGED
@@ -1,270 +1,276 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./
|
4 |
-
"epoch": 19.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
9 |
"log_history": [
|
10 |
{
|
11 |
"epoch": 1.0,
|
12 |
-
"eval_cer": 0.
|
13 |
-
"eval_loss": 0.
|
14 |
-
"eval_runtime":
|
15 |
-
"eval_samples_per_second": 8.
|
16 |
-
"eval_steps_per_second": 0.
|
17 |
-
"eval_wer": 0.
|
18 |
-
"step":
|
19 |
},
|
20 |
{
|
21 |
-
"epoch":
|
22 |
-
"
|
23 |
-
"
|
24 |
-
"
|
25 |
-
"eval_samples_per_second": 8.42,
|
26 |
-
"eval_steps_per_second": 0.526,
|
27 |
-
"eval_wer": 0.2646600230492848,
|
28 |
-
"step": 491
|
29 |
},
|
30 |
{
|
31 |
-
"epoch": 2.
|
32 |
-
"
|
33 |
-
"
|
34 |
-
"
|
|
|
|
|
|
|
|
|
35 |
},
|
36 |
{
|
37 |
"epoch": 3.0,
|
38 |
-
"eval_cer": 0.
|
39 |
-
"eval_loss": 0.
|
40 |
-
"eval_runtime":
|
41 |
-
"eval_samples_per_second": 8.
|
42 |
-
"eval_steps_per_second": 0.
|
43 |
-
"eval_wer": 0.
|
44 |
-
"step":
|
45 |
},
|
46 |
{
|
47 |
-
"epoch":
|
48 |
-
"
|
49 |
-
"
|
50 |
-
"
|
51 |
-
"eval_samples_per_second": 8.21,
|
52 |
-
"eval_steps_per_second": 0.513,
|
53 |
-
"eval_wer": 0.22676428716697172,
|
54 |
-
"step": 982
|
55 |
},
|
56 |
{
|
57 |
-
"epoch": 4.
|
58 |
-
"
|
59 |
-
"
|
60 |
-
"
|
|
|
|
|
|
|
|
|
61 |
},
|
62 |
{
|
63 |
"epoch": 5.0,
|
64 |
-
"eval_cer": 0.
|
65 |
-
"eval_loss": 0.
|
66 |
-
"eval_runtime":
|
67 |
-
"eval_samples_per_second": 8.
|
68 |
"eval_steps_per_second": 0.52,
|
69 |
-
"eval_wer": 0.
|
70 |
-
"step":
|
71 |
},
|
72 |
{
|
73 |
-
"epoch":
|
74 |
-
"
|
75 |
-
"
|
76 |
-
"
|
77 |
-
"eval_samples_per_second": 8.229,
|
78 |
-
"eval_steps_per_second": 0.514,
|
79 |
-
"eval_wer": 0.21964612568639413,
|
80 |
-
"step": 1473
|
81 |
},
|
82 |
{
|
83 |
-
"epoch": 6.
|
84 |
-
"
|
85 |
-
"
|
86 |
-
"
|
|
|
|
|
|
|
|
|
87 |
},
|
88 |
{
|
89 |
"epoch": 7.0,
|
90 |
-
"eval_cer": 0.
|
91 |
-
"eval_loss": 0.
|
92 |
-
"eval_runtime":
|
93 |
-
"eval_samples_per_second": 8.
|
94 |
-
"eval_steps_per_second": 0.
|
95 |
-
"eval_wer": 0.
|
96 |
-
"step":
|
97 |
},
|
98 |
{
|
99 |
-
"epoch":
|
100 |
-
"
|
101 |
-
"
|
102 |
-
"
|
103 |
-
"eval_samples_per_second": 8.3,
|
104 |
-
"eval_steps_per_second": 0.519,
|
105 |
-
"eval_wer": 0.20893498745847738,
|
106 |
-
"step": 1964
|
107 |
},
|
108 |
{
|
109 |
-
"epoch": 8.
|
110 |
-
"
|
111 |
-
"
|
112 |
-
"
|
|
|
|
|
|
|
|
|
113 |
},
|
114 |
{
|
115 |
"epoch": 9.0,
|
116 |
-
"eval_cer": 0.
|
117 |
-
"eval_loss": 0.
|
118 |
-
"eval_runtime":
|
119 |
-
"eval_samples_per_second": 8.
|
120 |
-
"eval_steps_per_second": 0.
|
121 |
-
"eval_wer": 0.
|
122 |
-
"step":
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
},
|
124 |
{
|
125 |
"epoch": 10.0,
|
126 |
-
"eval_cer": 0.
|
127 |
-
"eval_loss": 0.
|
128 |
-
"eval_runtime":
|
129 |
-
"eval_samples_per_second": 8.
|
130 |
-
"eval_steps_per_second": 0.
|
131 |
-
"eval_wer": 0.
|
132 |
-
"step":
|
133 |
},
|
134 |
{
|
135 |
-
"epoch": 10.
|
136 |
-
"learning_rate": 1.
|
137 |
-
"loss": 0.
|
138 |
-
"step":
|
139 |
},
|
140 |
{
|
141 |
"epoch": 11.0,
|
142 |
-
"eval_cer": 0.
|
143 |
-
"eval_loss": 0.
|
144 |
-
"eval_runtime":
|
145 |
-
"eval_samples_per_second": 8.
|
146 |
-
"eval_steps_per_second": 0.
|
147 |
-
"eval_wer": 0.
|
148 |
-
"step":
|
149 |
},
|
150 |
{
|
151 |
"epoch": 12.0,
|
152 |
-
"eval_cer": 0.
|
153 |
-
"eval_loss": 0.
|
154 |
-
"eval_runtime":
|
155 |
-
"eval_samples_per_second": 8.
|
156 |
-
"eval_steps_per_second": 0.
|
157 |
-
"eval_wer": 0.
|
158 |
-
"step":
|
159 |
},
|
160 |
{
|
161 |
-
"epoch": 12.
|
162 |
-
"learning_rate": 1.
|
163 |
-
"loss": 0.
|
164 |
-
"step":
|
165 |
},
|
166 |
{
|
167 |
"epoch": 13.0,
|
168 |
-
"eval_cer": 0.
|
169 |
-
"eval_loss": 0.
|
170 |
-
"eval_runtime":
|
171 |
-
"eval_samples_per_second": 8.
|
172 |
-
"eval_steps_per_second": 0.
|
173 |
-
"eval_wer": 0.
|
174 |
-
"step":
|
175 |
},
|
176 |
{
|
177 |
"epoch": 14.0,
|
178 |
-
"eval_cer": 0.
|
179 |
-
"eval_loss": 0.
|
180 |
-
"eval_runtime":
|
181 |
-
"eval_samples_per_second": 8.
|
182 |
"eval_steps_per_second": 0.519,
|
183 |
-
"eval_wer": 0.
|
184 |
-
"step":
|
185 |
},
|
186 |
{
|
187 |
-
"epoch": 14.
|
188 |
-
"learning_rate": 1.
|
189 |
-
"loss": 0.
|
190 |
-
"step":
|
191 |
},
|
192 |
{
|
193 |
"epoch": 15.0,
|
194 |
-
"eval_cer": 0.
|
195 |
-
"eval_loss": 0.
|
196 |
-
"eval_runtime":
|
197 |
-
"eval_samples_per_second": 8.
|
198 |
-
"eval_steps_per_second": 0.
|
199 |
-
"eval_wer": 0.
|
200 |
-
"step":
|
201 |
},
|
202 |
{
|
203 |
"epoch": 16.0,
|
204 |
-
"eval_cer": 0.
|
205 |
-
"eval_loss": 0.
|
206 |
-
"eval_runtime":
|
207 |
-
"eval_samples_per_second": 8.
|
208 |
-
"eval_steps_per_second": 0.
|
209 |
-
"eval_wer": 0.
|
210 |
-
"step":
|
211 |
},
|
212 |
{
|
213 |
-
"epoch": 16.
|
214 |
-
"learning_rate": 7.
|
215 |
-
"loss": 0.
|
216 |
-
"step":
|
217 |
},
|
218 |
{
|
219 |
"epoch": 17.0,
|
220 |
-
"eval_cer": 0.
|
221 |
-
"eval_loss": 0.
|
222 |
-
"eval_runtime":
|
223 |
-
"eval_samples_per_second": 8.
|
224 |
-
"eval_steps_per_second": 0.
|
225 |
-
"eval_wer": 0.
|
226 |
-
"step":
|
227 |
},
|
228 |
{
|
229 |
"epoch": 18.0,
|
230 |
-
"eval_cer": 0.
|
231 |
-
"eval_loss": 0.
|
232 |
-
"eval_runtime":
|
233 |
-
"eval_samples_per_second": 8.
|
234 |
"eval_steps_per_second": 0.519,
|
235 |
-
"eval_wer": 0.
|
236 |
-
"step":
|
237 |
},
|
238 |
{
|
239 |
-
"epoch": 18.
|
240 |
-
"learning_rate": 3.
|
241 |
-
"loss": 0.
|
242 |
-
"step":
|
243 |
},
|
244 |
{
|
245 |
"epoch": 19.0,
|
246 |
-
"eval_cer": 0.
|
247 |
-
"eval_loss": 0.
|
248 |
-
"eval_runtime":
|
249 |
-
"eval_samples_per_second": 8.
|
250 |
-
"eval_steps_per_second": 0.
|
251 |
-
"eval_wer": 0.
|
252 |
-
"step":
|
253 |
},
|
254 |
{
|
255 |
-
"epoch": 19.
|
256 |
-
"eval_cer": 0.
|
257 |
-
"eval_loss": 0.
|
258 |
-
"eval_runtime":
|
259 |
-
"eval_samples_per_second": 8.
|
260 |
-
"eval_steps_per_second": 0.
|
261 |
-
"eval_wer": 0.
|
262 |
-
"step":
|
263 |
}
|
264 |
],
|
265 |
-
"max_steps":
|
266 |
"num_train_epochs": 20,
|
267 |
-
"total_flos":
|
268 |
"trial_name": null,
|
269 |
"trial_params": null
|
270 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.5421165227890015,
|
3 |
+
"best_model_checkpoint": "./20230701_models/checkpoint-1099",
|
4 |
+
"epoch": 19.94540491355778,
|
5 |
+
"global_step": 5480,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
9 |
"log_history": [
|
10 |
{
|
11 |
"epoch": 1.0,
|
12 |
+
"eval_cer": 0.10819683151455398,
|
13 |
+
"eval_loss": 0.6883996725082397,
|
14 |
+
"eval_runtime": 259.3566,
|
15 |
+
"eval_samples_per_second": 8.143,
|
16 |
+
"eval_steps_per_second": 0.509,
|
17 |
+
"eval_wer": 0.26681723843242583,
|
18 |
+
"step": 274
|
19 |
},
|
20 |
{
|
21 |
+
"epoch": 1.82,
|
22 |
+
"learning_rate": 3.635766423357665e-05,
|
23 |
+
"loss": 0.9397,
|
24 |
+
"step": 500
|
|
|
|
|
|
|
|
|
25 |
},
|
26 |
{
|
27 |
+
"epoch": 2.0,
|
28 |
+
"eval_cer": 0.08392901158491496,
|
29 |
+
"eval_loss": 0.5709623694419861,
|
30 |
+
"eval_runtime": 251.7707,
|
31 |
+
"eval_samples_per_second": 8.389,
|
32 |
+
"eval_steps_per_second": 0.524,
|
33 |
+
"eval_wer": 0.2248809669149066,
|
34 |
+
"step": 549
|
35 |
},
|
36 |
{
|
37 |
"epoch": 3.0,
|
38 |
+
"eval_cer": 0.07773321083648912,
|
39 |
+
"eval_loss": 0.5516401529312134,
|
40 |
+
"eval_runtime": 253.1328,
|
41 |
+
"eval_samples_per_second": 8.343,
|
42 |
+
"eval_steps_per_second": 0.521,
|
43 |
+
"eval_wer": 0.20919301672567453,
|
44 |
+
"step": 824
|
45 |
},
|
46 |
{
|
47 |
+
"epoch": 3.64,
|
48 |
+
"learning_rate": 3.270802919708029e-05,
|
49 |
+
"loss": 0.2569,
|
50 |
+
"step": 1000
|
|
|
|
|
|
|
|
|
51 |
},
|
52 |
{
|
53 |
+
"epoch": 4.0,
|
54 |
+
"eval_cer": 0.07259058417549914,
|
55 |
+
"eval_loss": 0.5421165227890015,
|
56 |
+
"eval_runtime": 253.562,
|
57 |
+
"eval_samples_per_second": 8.329,
|
58 |
+
"eval_steps_per_second": 0.521,
|
59 |
+
"eval_wer": 0.20198998901233062,
|
60 |
+
"step": 1099
|
61 |
},
|
62 |
{
|
63 |
"epoch": 5.0,
|
64 |
+
"eval_cer": 0.07377820601877787,
|
65 |
+
"eval_loss": 0.5494938492774963,
|
66 |
+
"eval_runtime": 254.0497,
|
67 |
+
"eval_samples_per_second": 8.313,
|
68 |
"eval_steps_per_second": 0.52,
|
69 |
+
"eval_wer": 0.2014406055426688,
|
70 |
+
"step": 1373
|
71 |
},
|
72 |
{
|
73 |
+
"epoch": 5.46,
|
74 |
+
"learning_rate": 2.9058394160583945e-05,
|
75 |
+
"loss": 0.109,
|
76 |
+
"step": 1500
|
|
|
|
|
|
|
|
|
77 |
},
|
78 |
{
|
79 |
+
"epoch": 6.0,
|
80 |
+
"eval_cer": 0.07154861406771686,
|
81 |
+
"eval_loss": 0.5665440559387207,
|
82 |
+
"eval_runtime": 253.9146,
|
83 |
+
"eval_samples_per_second": 8.318,
|
84 |
+
"eval_steps_per_second": 0.52,
|
85 |
+
"eval_wer": 0.1971676230008546,
|
86 |
+
"step": 1648
|
87 |
},
|
88 |
{
|
89 |
"epoch": 7.0,
|
90 |
+
"eval_cer": 0.07226566876554552,
|
91 |
+
"eval_loss": 0.5868140459060669,
|
92 |
+
"eval_runtime": 253.2966,
|
93 |
+
"eval_samples_per_second": 8.338,
|
94 |
+
"eval_steps_per_second": 0.521,
|
95 |
+
"eval_wer": 0.1968624099621536,
|
96 |
+
"step": 1923
|
97 |
},
|
98 |
{
|
99 |
+
"epoch": 7.28,
|
100 |
+
"learning_rate": 2.5408759124087593e-05,
|
101 |
+
"loss": 0.0481,
|
102 |
+
"step": 2000
|
|
|
|
|
|
|
|
|
103 |
},
|
104 |
{
|
105 |
+
"epoch": 8.0,
|
106 |
+
"eval_cer": 0.07029376834651668,
|
107 |
+
"eval_loss": 0.5920885801315308,
|
108 |
+
"eval_runtime": 252.718,
|
109 |
+
"eval_samples_per_second": 8.357,
|
110 |
+
"eval_steps_per_second": 0.522,
|
111 |
+
"eval_wer": 0.19570260041508974,
|
112 |
+
"step": 2198
|
113 |
},
|
114 |
{
|
115 |
"epoch": 9.0,
|
116 |
+
"eval_cer": 0.07013691263136666,
|
117 |
+
"eval_loss": 0.5932603478431702,
|
118 |
+
"eval_runtime": 254.5349,
|
119 |
+
"eval_samples_per_second": 8.297,
|
120 |
+
"eval_steps_per_second": 0.519,
|
121 |
+
"eval_wer": 0.19374923696740326,
|
122 |
+
"step": 2472
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"epoch": 9.1,
|
126 |
+
"learning_rate": 2.1759124087591242e-05,
|
127 |
+
"loss": 0.022,
|
128 |
+
"step": 2500
|
129 |
},
|
130 |
{
|
131 |
"epoch": 10.0,
|
132 |
+
"eval_cer": 0.06856835547986645,
|
133 |
+
"eval_loss": 0.6067739725112915,
|
134 |
+
"eval_runtime": 253.5231,
|
135 |
+
"eval_samples_per_second": 8.331,
|
136 |
+
"eval_steps_per_second": 0.521,
|
137 |
+
"eval_wer": 0.1925894274203394,
|
138 |
+
"step": 2747
|
139 |
},
|
140 |
{
|
141 |
+
"epoch": 10.92,
|
142 |
+
"learning_rate": 1.8116788321167883e-05,
|
143 |
+
"loss": 0.0108,
|
144 |
+
"step": 3000
|
145 |
},
|
146 |
{
|
147 |
"epoch": 11.0,
|
148 |
+
"eval_cer": 0.06744795751450916,
|
149 |
+
"eval_loss": 0.6042998433113098,
|
150 |
+
"eval_runtime": 253.2695,
|
151 |
+
"eval_samples_per_second": 8.339,
|
152 |
+
"eval_steps_per_second": 0.521,
|
153 |
+
"eval_wer": 0.1902087657184715,
|
154 |
+
"step": 3022
|
155 |
},
|
156 |
{
|
157 |
"epoch": 12.0,
|
158 |
+
"eval_cer": 0.06674210679633406,
|
159 |
+
"eval_loss": 0.6130247116088867,
|
160 |
+
"eval_runtime": 253.5585,
|
161 |
+
"eval_samples_per_second": 8.329,
|
162 |
+
"eval_steps_per_second": 0.521,
|
163 |
+
"eval_wer": 0.18856061530948603,
|
164 |
+
"step": 3297
|
165 |
},
|
166 |
{
|
167 |
+
"epoch": 12.74,
|
168 |
+
"learning_rate": 1.4467153284671533e-05,
|
169 |
+
"loss": 0.0052,
|
170 |
+
"step": 3500
|
171 |
},
|
172 |
{
|
173 |
"epoch": 13.0,
|
174 |
+
"eval_cer": 0.0659802361798911,
|
175 |
+
"eval_loss": 0.6161753535270691,
|
176 |
+
"eval_runtime": 254.095,
|
177 |
+
"eval_samples_per_second": 8.312,
|
178 |
+
"eval_steps_per_second": 0.519,
|
179 |
+
"eval_wer": 0.1861799536076181,
|
180 |
+
"step": 3571
|
181 |
},
|
182 |
{
|
183 |
"epoch": 14.0,
|
184 |
+
"eval_cer": 0.06466937056042306,
|
185 |
+
"eval_loss": 0.6252880692481995,
|
186 |
+
"eval_runtime": 254.0952,
|
187 |
+
"eval_samples_per_second": 8.312,
|
188 |
"eval_steps_per_second": 0.519,
|
189 |
+
"eval_wer": 0.18471493102185324,
|
190 |
+
"step": 3846
|
191 |
},
|
192 |
{
|
193 |
+
"epoch": 14.56,
|
194 |
+
"learning_rate": 1.0817518248175182e-05,
|
195 |
+
"loss": 0.0028,
|
196 |
+
"step": 4000
|
197 |
},
|
198 |
{
|
199 |
"epoch": 15.0,
|
200 |
+
"eval_cer": 0.06428843525220158,
|
201 |
+
"eval_loss": 0.6240524649620056,
|
202 |
+
"eval_runtime": 253.6298,
|
203 |
+
"eval_samples_per_second": 8.327,
|
204 |
+
"eval_steps_per_second": 0.52,
|
205 |
+
"eval_wer": 0.18270052496642655,
|
206 |
+
"step": 4121
|
207 |
},
|
208 |
{
|
209 |
"epoch": 16.0,
|
210 |
+
"eval_cer": 0.06460214668250162,
|
211 |
+
"eval_loss": 0.6327393054962158,
|
212 |
+
"eval_runtime": 254.0581,
|
213 |
+
"eval_samples_per_second": 8.313,
|
214 |
+
"eval_steps_per_second": 0.52,
|
215 |
+
"eval_wer": 0.18428763276767182,
|
216 |
+
"step": 4396
|
217 |
},
|
218 |
{
|
219 |
+
"epoch": 16.38,
|
220 |
+
"learning_rate": 7.167883211678832e-06,
|
221 |
+
"loss": 0.0016,
|
222 |
+
"step": 4500
|
223 |
},
|
224 |
{
|
225 |
"epoch": 17.0,
|
226 |
+
"eval_cer": 0.0632464651444193,
|
227 |
+
"eval_loss": 0.6320570707321167,
|
228 |
+
"eval_runtime": 253.728,
|
229 |
+
"eval_samples_per_second": 8.324,
|
230 |
+
"eval_steps_per_second": 0.52,
|
231 |
+
"eval_wer": 0.18190697106580392,
|
232 |
+
"step": 4670
|
233 |
},
|
234 |
{
|
235 |
"epoch": 18.0,
|
236 |
+
"eval_cer": 0.06377305218813722,
|
237 |
+
"eval_loss": 0.6347479224205017,
|
238 |
+
"eval_runtime": 254.3801,
|
239 |
+
"eval_samples_per_second": 8.303,
|
240 |
"eval_steps_per_second": 0.519,
|
241 |
+
"eval_wer": 0.18239531192772554,
|
242 |
+
"step": 4945
|
243 |
},
|
244 |
{
|
245 |
+
"epoch": 18.2,
|
246 |
+
"learning_rate": 3.5182481751824822e-06,
|
247 |
+
"loss": 0.0011,
|
248 |
+
"step": 5000
|
249 |
},
|
250 |
{
|
251 |
"epoch": 19.0,
|
252 |
+
"eval_cer": 0.06367221637125507,
|
253 |
+
"eval_loss": 0.6383982300758362,
|
254 |
+
"eval_runtime": 254.2837,
|
255 |
+
"eval_samples_per_second": 8.306,
|
256 |
+
"eval_steps_per_second": 0.519,
|
257 |
+
"eval_wer": 0.1815407154193627,
|
258 |
+
"step": 5220
|
259 |
},
|
260 |
{
|
261 |
+
"epoch": 19.95,
|
262 |
+
"eval_cer": 0.06354897259506577,
|
263 |
+
"eval_loss": 0.6386234164237976,
|
264 |
+
"eval_runtime": 253.9317,
|
265 |
+
"eval_samples_per_second": 8.317,
|
266 |
+
"eval_steps_per_second": 0.52,
|
267 |
+
"eval_wer": 0.18233426931998534,
|
268 |
+
"step": 5480
|
269 |
}
|
270 |
],
|
271 |
+
"max_steps": 5480,
|
272 |
"num_train_epochs": 20,
|
273 |
+
"total_flos": 9.42327114459343e+20,
|
274 |
"trial_name": null,
|
275 |
"trial_params": null
|
276 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4027
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71d19633df0be7b388fccb41f0bcfa3f9e1a5625377ced62c9270cfe649830c4
|
3 |
size 4027
|