dq158 commited on
Commit
db962a7
1 Parent(s): b7a05aa

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d18f93b6416cb6de922b525ae2aefefd5555f3956bf539033033a8a0334866a
3
  size 3132668808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b985e3b1232efa998749dd8fe5b7ab2a5d8bb9336016065ad479125aa0f57988
3
  size 3132668808
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e04dbe6cc517d74a5eb81747881c0161660f2668ab3564ad3304a3fd6f87af59
3
  size 6265677800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b1fd08ada4aca024da973715ebb6878613e3a62c9f04e5b322729e105bd57e4
3
  size 6265677800
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:427e669d0ee683c4b12f0805ce85ad0ea605698ac777a13ff0e4e41b5b4ddf99
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e505d820e9e1be748947c6f1f77d200067095eb7bab42cfad0b1d50e89cd7f6c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3199f68c919ad9f15fb49df0b36624f234cba762e6bf2c59cdcbf6ebb2295917
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c15f3dd716eade4303f05f39a43259606574efb34071f5e07f31142c4c390d5a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -3,450 +3,336 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 34567,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.01,
13
  "learning_rate": 0.0001,
14
- "loss": 3.3228,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 0.03,
19
- "learning_rate": 9.999994258403258e-05,
20
- "loss": 2.8639,
21
  "step": 1000
22
  },
23
  {
24
- "epoch": 0.04,
25
- "learning_rate": 9.99997703362622e-05,
26
- "loss": 2.8198,
27
  "step": 1500
28
  },
29
  {
30
- "epoch": 0.06,
31
- "learning_rate": 9.999948325708443e-05,
32
- "loss": 2.7858,
33
  "step": 2000
34
  },
35
  {
36
- "epoch": 0.07,
37
- "learning_rate": 9.999908134715859e-05,
38
- "loss": 2.7422,
39
  "step": 2500
40
  },
41
  {
42
- "epoch": 0.09,
43
- "learning_rate": 9.999856460740773e-05,
44
- "loss": 2.7274,
45
  "step": 3000
46
  },
47
  {
48
- "epoch": 0.1,
49
- "learning_rate": 9.99979330390186e-05,
50
- "loss": 2.6958,
51
  "step": 3500
52
  },
53
  {
54
- "epoch": 0.12,
55
- "learning_rate": 9.999718664344171e-05,
56
- "loss": 2.6617,
57
  "step": 4000
58
  },
59
  {
60
- "epoch": 0.13,
61
- "learning_rate": 9.999632542239125e-05,
62
- "loss": 2.6747,
63
  "step": 4500
64
  },
65
  {
66
- "epoch": 0.14,
67
- "learning_rate": 9.999534937784512e-05,
68
- "loss": 2.6564,
69
  "step": 5000
70
  },
71
  {
72
- "epoch": 0.16,
73
- "learning_rate": 9.999425851204496e-05,
74
- "loss": 2.585,
75
  "step": 5500
76
  },
77
  {
78
- "epoch": 0.17,
79
- "learning_rate": 9.99930528274961e-05,
80
- "loss": 2.6385,
81
  "step": 6000
82
  },
83
  {
84
- "epoch": 0.19,
85
- "learning_rate": 9.999173232696753e-05,
86
- "loss": 2.6262,
87
  "step": 6500
88
  },
89
  {
90
- "epoch": 0.2,
91
- "learning_rate": 9.999029701349196e-05,
92
- "loss": 2.6055,
93
  "step": 7000
94
  },
95
  {
96
- "epoch": 0.22,
97
- "learning_rate": 9.998874689036583e-05,
98
- "loss": 2.5917,
99
  "step": 7500
100
  },
101
  {
102
- "epoch": 0.23,
103
- "learning_rate": 9.998708196114922e-05,
104
- "loss": 2.6162,
105
  "step": 8000
106
  },
107
  {
108
- "epoch": 0.25,
109
- "learning_rate": 9.99853022296658e-05,
110
- "loss": 2.6188,
111
  "step": 8500
112
  },
113
  {
114
- "epoch": 0.26,
115
- "learning_rate": 9.998340770000302e-05,
116
- "loss": 2.5671,
117
  "step": 9000
118
  },
119
  {
120
- "epoch": 0.27,
121
- "learning_rate": 9.998139837651193e-05,
122
- "loss": 2.5897,
123
  "step": 9500
124
  },
125
  {
126
- "epoch": 0.29,
127
- "learning_rate": 9.997927426380721e-05,
128
- "loss": 2.5414,
129
  "step": 10000
130
  },
131
  {
132
- "epoch": 0.3,
133
- "learning_rate": 9.997703536676718e-05,
134
- "loss": 2.5139,
135
  "step": 10500
136
  },
137
  {
138
- "epoch": 0.32,
139
- "learning_rate": 9.997468169053379e-05,
140
- "loss": 2.5904,
141
  "step": 11000
142
  },
143
  {
144
- "epoch": 0.33,
145
- "learning_rate": 9.997221324051255e-05,
146
- "loss": 2.6288,
147
  "step": 11500
148
  },
149
  {
150
- "epoch": 0.35,
151
- "learning_rate": 9.996963002237263e-05,
152
- "loss": 2.598,
153
  "step": 12000
154
  },
155
  {
156
- "epoch": 0.36,
157
- "learning_rate": 9.996693204204674e-05,
158
- "loss": 2.5276,
159
  "step": 12500
160
  },
161
  {
162
- "epoch": 0.38,
163
- "learning_rate": 9.996411930573117e-05,
164
- "loss": 2.5817,
165
  "step": 13000
166
  },
167
  {
168
- "epoch": 0.39,
169
- "learning_rate": 9.996119181988575e-05,
170
- "loss": 2.5316,
171
  "step": 13500
172
  },
173
  {
174
- "epoch": 0.41,
175
- "learning_rate": 9.995814959123386e-05,
176
- "loss": 2.4692,
177
  "step": 14000
178
  },
179
  {
180
- "epoch": 0.42,
181
- "learning_rate": 9.995499262676243e-05,
182
- "loss": 2.5464,
183
  "step": 14500
184
  },
185
  {
186
- "epoch": 0.43,
187
- "learning_rate": 9.99517209337218e-05,
188
- "loss": 2.5222,
189
  "step": 15000
190
  },
191
  {
192
- "epoch": 0.45,
193
- "learning_rate": 9.994833451962592e-05,
194
- "loss": 2.5304,
195
  "step": 15500
196
  },
197
  {
198
- "epoch": 0.46,
199
- "learning_rate": 9.994483339225213e-05,
200
- "loss": 2.6063,
201
  "step": 16000
202
  },
203
  {
204
- "epoch": 0.48,
205
- "learning_rate": 9.994121755964129e-05,
206
- "loss": 2.5286,
207
  "step": 16500
208
  },
209
  {
210
- "epoch": 0.49,
211
- "learning_rate": 9.993748703009764e-05,
212
- "loss": 2.5273,
213
  "step": 17000
214
  },
215
  {
216
- "epoch": 0.51,
217
- "learning_rate": 9.993364181218885e-05,
218
- "loss": 2.4868,
219
  "step": 17500
220
  },
221
  {
222
- "epoch": 0.52,
223
- "learning_rate": 9.992968191474601e-05,
224
- "loss": 2.435,
225
  "step": 18000
226
  },
227
  {
228
- "epoch": 0.54,
229
- "learning_rate": 9.992560734686357e-05,
230
- "loss": 2.484,
231
  "step": 18500
232
  },
233
  {
234
- "epoch": 0.55,
235
- "learning_rate": 9.992141811789933e-05,
236
- "loss": 2.5301,
237
  "step": 19000
238
  },
239
  {
240
- "epoch": 0.56,
241
- "learning_rate": 9.991711423747445e-05,
242
- "loss": 2.4857,
243
  "step": 19500
244
  },
245
  {
246
- "epoch": 0.58,
247
- "learning_rate": 9.991269571547339e-05,
248
- "loss": 2.4958,
249
  "step": 20000
250
  },
251
  {
252
- "epoch": 0.59,
253
- "learning_rate": 9.99081625620439e-05,
254
- "loss": 2.4757,
255
  "step": 20500
256
  },
257
  {
258
- "epoch": 0.61,
259
- "learning_rate": 9.990351478759696e-05,
260
- "loss": 2.544,
261
  "step": 21000
262
  },
263
  {
264
- "epoch": 0.62,
265
- "learning_rate": 9.989875240280689e-05,
266
- "loss": 2.4796,
267
  "step": 21500
268
  },
269
  {
270
- "epoch": 0.64,
271
- "learning_rate": 9.989387541861111e-05,
272
- "loss": 2.4968,
273
  "step": 22000
274
  },
275
  {
276
- "epoch": 0.65,
277
- "learning_rate": 9.988888384621031e-05,
278
- "loss": 2.4426,
279
  "step": 22500
280
  },
281
  {
282
- "epoch": 0.67,
283
- "learning_rate": 9.988377769706834e-05,
284
- "loss": 2.4471,
285
  "step": 23000
286
  },
287
  {
288
- "epoch": 0.68,
289
- "learning_rate": 9.987855698291218e-05,
290
- "loss": 2.5022,
291
  "step": 23500
292
  },
293
  {
294
- "epoch": 0.69,
295
- "learning_rate": 9.98732217157319e-05,
296
- "loss": 2.5202,
297
  "step": 24000
298
  },
299
  {
300
- "epoch": 0.71,
301
- "learning_rate": 9.98677719077807e-05,
302
- "loss": 2.5562,
303
  "step": 24500
304
  },
305
  {
306
- "epoch": 0.72,
307
- "learning_rate": 9.986220757157482e-05,
308
- "loss": 2.4888,
309
  "step": 25000
310
  },
311
- {
312
- "epoch": 0.74,
313
- "learning_rate": 9.985652871989352e-05,
314
- "loss": 2.5049,
315
- "step": 25500
316
- },
317
- {
318
- "epoch": 0.75,
319
- "learning_rate": 9.98507353657791e-05,
320
- "loss": 2.4664,
321
- "step": 26000
322
- },
323
- {
324
- "epoch": 0.77,
325
- "learning_rate": 9.984482752253677e-05,
326
- "loss": 2.4528,
327
- "step": 26500
328
- },
329
- {
330
- "epoch": 0.78,
331
- "learning_rate": 9.98388052037347e-05,
332
- "loss": 2.4577,
333
- "step": 27000
334
- },
335
- {
336
- "epoch": 0.8,
337
- "learning_rate": 9.983266842320402e-05,
338
- "loss": 2.4889,
339
- "step": 27500
340
- },
341
- {
342
- "epoch": 0.81,
343
- "learning_rate": 9.982641719503866e-05,
344
- "loss": 2.4272,
345
- "step": 28000
346
- },
347
- {
348
- "epoch": 0.82,
349
- "learning_rate": 9.982005153359547e-05,
350
- "loss": 2.4783,
351
- "step": 28500
352
- },
353
- {
354
- "epoch": 0.84,
355
- "learning_rate": 9.981357145349406e-05,
356
- "loss": 2.4795,
357
- "step": 29000
358
- },
359
- {
360
- "epoch": 0.85,
361
- "learning_rate": 9.98069769696168e-05,
362
- "loss": 2.4807,
363
- "step": 29500
364
- },
365
- {
366
- "epoch": 0.87,
367
- "learning_rate": 9.980026809710888e-05,
368
- "loss": 2.4951,
369
- "step": 30000
370
- },
371
- {
372
- "epoch": 0.88,
373
- "learning_rate": 9.979344485137813e-05,
374
- "loss": 2.5137,
375
- "step": 30500
376
- },
377
- {
378
- "epoch": 0.9,
379
- "learning_rate": 9.978650724809511e-05,
380
- "loss": 2.5249,
381
- "step": 31000
382
- },
383
- {
384
- "epoch": 0.91,
385
- "learning_rate": 9.977945530319297e-05,
386
- "loss": 2.4092,
387
- "step": 31500
388
- },
389
- {
390
- "epoch": 0.93,
391
- "learning_rate": 9.977228903286746e-05,
392
- "loss": 2.4978,
393
- "step": 32000
394
- },
395
- {
396
- "epoch": 0.94,
397
- "learning_rate": 9.976500845357694e-05,
398
- "loss": 2.4361,
399
- "step": 32500
400
- },
401
- {
402
- "epoch": 0.95,
403
- "learning_rate": 9.975761358204227e-05,
404
- "loss": 2.4774,
405
- "step": 33000
406
- },
407
- {
408
- "epoch": 0.97,
409
- "learning_rate": 9.975010443524679e-05,
410
- "loss": 2.4662,
411
- "step": 33500
412
- },
413
- {
414
- "epoch": 0.98,
415
- "learning_rate": 9.974248103043629e-05,
416
- "loss": 2.4252,
417
- "step": 34000
418
- },
419
- {
420
- "epoch": 1.0,
421
- "learning_rate": 9.973474338511898e-05,
422
- "loss": 2.4689,
423
- "step": 34500
424
- },
425
  {
426
  "epoch": 1.0,
427
  "eval_bleu": 1.0,
428
  "eval_brevity_penalty": 1.0,
429
  "eval_length_ratio": 1.0,
430
- "eval_loss": 2.3501155376434326,
431
  "eval_precisions": [
432
  1.0,
433
  1.0,
434
  1.0,
435
  1.0
436
  ],
437
- "eval_reference_length": 1966592,
438
- "eval_runtime": 3383.1867,
439
- "eval_samples_per_second": 1.135,
440
- "eval_steps_per_second": 1.135,
441
- "eval_translation_length": 1966592,
442
- "step": 34567
443
  }
444
  ],
445
  "logging_steps": 500,
446
- "max_steps": 1037010,
447
  "num_train_epochs": 30,
448
  "save_steps": 500,
449
- "total_flos": 7.966891375696282e+16,
450
  "trial_name": null,
451
  "trial_params": null
452
  }
 
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 25291,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.02,
13
  "learning_rate": 0.0001,
14
+ "loss": 3.3233,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 0.04,
19
+ "learning_rate": 9.99998927054457e-05,
20
+ "loss": 2.8783,
21
  "step": 1000
22
  },
23
  {
24
+ "epoch": 0.06,
25
+ "learning_rate": 9.999957082224324e-05,
26
+ "loss": 2.6917,
27
  "step": 1500
28
  },
29
  {
30
+ "epoch": 0.08,
31
+ "learning_rate": 9.999903435177409e-05,
32
+ "loss": 2.7154,
33
  "step": 2000
34
  },
35
  {
36
+ "epoch": 0.1,
37
+ "learning_rate": 9.999828329634069e-05,
38
+ "loss": 2.7366,
39
  "step": 2500
40
  },
41
  {
42
+ "epoch": 0.12,
43
+ "learning_rate": 9.999731765916636e-05,
44
+ "loss": 2.7151,
45
  "step": 3000
46
  },
47
  {
48
+ "epoch": 0.14,
49
+ "learning_rate": 9.999613744439543e-05,
50
+ "loss": 2.6935,
51
  "step": 3500
52
  },
53
  {
54
+ "epoch": 0.16,
55
+ "learning_rate": 9.999474265709312e-05,
56
+ "loss": 2.6431,
57
  "step": 4000
58
  },
59
  {
60
+ "epoch": 0.18,
61
+ "learning_rate": 9.999313330324557e-05,
62
+ "loss": 2.6171,
63
  "step": 4500
64
  },
65
  {
66
+ "epoch": 0.2,
67
+ "learning_rate": 9.999130938975975e-05,
68
+ "loss": 2.6553,
69
  "step": 5000
70
  },
71
  {
72
+ "epoch": 0.22,
73
+ "learning_rate": 9.998927092446351e-05,
74
+ "loss": 2.5905,
75
  "step": 5500
76
  },
77
  {
78
+ "epoch": 0.24,
79
+ "learning_rate": 9.99870179161055e-05,
80
+ "loss": 2.5851,
81
  "step": 6000
82
  },
83
  {
84
+ "epoch": 0.26,
85
+ "learning_rate": 9.998455037435515e-05,
86
+ "loss": 2.7004,
87
  "step": 6500
88
  },
89
  {
90
+ "epoch": 0.28,
91
+ "learning_rate": 9.998186830980259e-05,
92
+ "loss": 2.5175,
93
  "step": 7000
94
  },
95
  {
96
+ "epoch": 0.3,
97
+ "learning_rate": 9.997897173395868e-05,
98
+ "loss": 2.5745,
99
  "step": 7500
100
  },
101
  {
102
+ "epoch": 0.32,
103
+ "learning_rate": 9.997586065925489e-05,
104
+ "loss": 2.515,
105
  "step": 8000
106
  },
107
  {
108
+ "epoch": 0.34,
109
+ "learning_rate": 9.997253509904324e-05,
110
+ "loss": 2.5784,
111
  "step": 8500
112
  },
113
  {
114
+ "epoch": 0.36,
115
+ "learning_rate": 9.996899506759634e-05,
116
+ "loss": 2.5426,
117
  "step": 9000
118
  },
119
  {
120
+ "epoch": 0.38,
121
+ "learning_rate": 9.996524058010725e-05,
122
+ "loss": 2.5677,
123
  "step": 9500
124
  },
125
  {
126
+ "epoch": 0.4,
127
+ "learning_rate": 9.996127165268938e-05,
128
+ "loss": 2.5871,
129
  "step": 10000
130
  },
131
  {
132
+ "epoch": 0.42,
133
+ "learning_rate": 9.995708830237652e-05,
134
+ "loss": 2.6239,
135
  "step": 10500
136
  },
137
  {
138
+ "epoch": 0.43,
139
+ "learning_rate": 9.995269054712269e-05,
140
+ "loss": 2.5366,
141
  "step": 11000
142
  },
143
  {
144
+ "epoch": 0.45,
145
+ "learning_rate": 9.994807840580211e-05,
146
+ "loss": 2.5484,
147
  "step": 11500
148
  },
149
  {
150
+ "epoch": 0.47,
151
+ "learning_rate": 9.994325189820907e-05,
152
+ "loss": 2.5512,
153
  "step": 12000
154
  },
155
  {
156
+ "epoch": 0.49,
157
+ "learning_rate": 9.99382110450579e-05,
158
+ "loss": 2.5573,
159
  "step": 12500
160
  },
161
  {
162
+ "epoch": 0.51,
163
+ "learning_rate": 9.993295586798282e-05,
164
+ "loss": 2.5584,
165
  "step": 13000
166
  },
167
  {
168
+ "epoch": 0.53,
169
+ "learning_rate": 9.992748638953795e-05,
170
+ "loss": 2.5176,
171
  "step": 13500
172
  },
173
  {
174
+ "epoch": 0.55,
175
+ "learning_rate": 9.992180263319706e-05,
176
+ "loss": 2.5322,
177
  "step": 14000
178
  },
179
  {
180
+ "epoch": 0.57,
181
+ "learning_rate": 9.991590462335362e-05,
182
+ "loss": 2.4881,
183
  "step": 14500
184
  },
185
  {
186
+ "epoch": 0.59,
187
+ "learning_rate": 9.990979238532059e-05,
188
+ "loss": 2.5357,
189
  "step": 15000
190
  },
191
  {
192
+ "epoch": 0.61,
193
+ "learning_rate": 9.990346594533036e-05,
194
+ "loss": 2.4982,
195
  "step": 15500
196
  },
197
  {
198
+ "epoch": 0.63,
199
+ "learning_rate": 9.989692533053464e-05,
200
+ "loss": 2.5111,
201
  "step": 16000
202
  },
203
  {
204
+ "epoch": 0.65,
205
+ "learning_rate": 9.989017056900434e-05,
206
+ "loss": 2.5439,
207
  "step": 16500
208
  },
209
  {
210
+ "epoch": 0.67,
211
+ "learning_rate": 9.988320168972938e-05,
212
+ "loss": 2.4806,
213
  "step": 17000
214
  },
215
  {
216
+ "epoch": 0.69,
217
+ "learning_rate": 9.987601872261871e-05,
218
+ "loss": 2.4715,
219
  "step": 17500
220
  },
221
  {
222
+ "epoch": 0.71,
223
+ "learning_rate": 9.986862169850007e-05,
224
+ "loss": 2.4796,
225
  "step": 18000
226
  },
227
  {
228
+ "epoch": 0.73,
229
+ "learning_rate": 9.986101064911984e-05,
230
+ "loss": 2.5061,
231
  "step": 18500
232
  },
233
  {
234
+ "epoch": 0.75,
235
+ "learning_rate": 9.985318560714301e-05,
236
+ "loss": 2.4945,
237
  "step": 19000
238
  },
239
  {
240
+ "epoch": 0.77,
241
+ "learning_rate": 9.984514660615293e-05,
242
+ "loss": 2.4611,
243
  "step": 19500
244
  },
245
  {
246
+ "epoch": 0.79,
247
+ "learning_rate": 9.983689368065128e-05,
248
+ "loss": 2.5054,
249
  "step": 20000
250
  },
251
  {
252
+ "epoch": 0.81,
253
+ "learning_rate": 9.98284268660578e-05,
254
+ "loss": 2.5272,
255
  "step": 20500
256
  },
257
  {
258
+ "epoch": 0.83,
259
+ "learning_rate": 9.981974619871019e-05,
260
+ "loss": 2.4692,
261
  "step": 21000
262
  },
263
  {
264
+ "epoch": 0.85,
265
+ "learning_rate": 9.981085171586402e-05,
266
+ "loss": 2.4757,
267
  "step": 21500
268
  },
269
  {
270
+ "epoch": 0.87,
271
+ "learning_rate": 9.980174345569246e-05,
272
+ "loss": 2.5493,
273
  "step": 22000
274
  },
275
  {
276
+ "epoch": 0.89,
277
+ "learning_rate": 9.979242145728618e-05,
278
+ "loss": 2.4543,
279
  "step": 22500
280
  },
281
  {
282
+ "epoch": 0.91,
283
+ "learning_rate": 9.978288576065315e-05,
284
+ "loss": 2.4532,
285
  "step": 23000
286
  },
287
  {
288
+ "epoch": 0.93,
289
+ "learning_rate": 9.977313640671853e-05,
290
+ "loss": 2.4311,
291
  "step": 23500
292
  },
293
  {
294
+ "epoch": 0.95,
295
+ "learning_rate": 9.97631734373244e-05,
296
+ "loss": 2.4512,
297
  "step": 24000
298
  },
299
  {
300
+ "epoch": 0.97,
301
+ "learning_rate": 9.975299689522967e-05,
302
+ "loss": 2.46,
303
  "step": 24500
304
  },
305
  {
306
+ "epoch": 0.99,
307
+ "learning_rate": 9.974260682410984e-05,
308
+ "loss": 2.5014,
309
  "step": 25000
310
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  {
312
  "epoch": 1.0,
313
  "eval_bleu": 1.0,
314
  "eval_brevity_penalty": 1.0,
315
  "eval_length_ratio": 1.0,
316
+ "eval_loss": 2.3814520835876465,
317
  "eval_precisions": [
318
  1.0,
319
  1.0,
320
  1.0,
321
  1.0
322
  ],
323
+ "eval_reference_length": 1439232,
324
+ "eval_runtime": 2544.152,
325
+ "eval_samples_per_second": 1.105,
326
+ "eval_steps_per_second": 1.105,
327
+ "eval_translation_length": 1439232,
328
+ "step": 25291
329
  }
330
  ],
331
  "logging_steps": 500,
332
+ "max_steps": 758730,
333
  "num_train_epochs": 30,
334
  "save_steps": 500,
335
+ "total_flos": 5.828988624489677e+16,
336
  "trial_name": null,
337
  "trial_params": null
338
  }