dq158 commited on
Commit
15262e0
1 Parent(s): 8f800e5

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -16,8 +16,8 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "v",
20
- "q"
21
  ],
22
  "task_type": "SEQ_2_SEQ_LM"
23
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "q",
20
+ "v"
21
  ],
22
  "task_type": "SEQ_2_SEQ_LM"
23
  }
last-checkpoint/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_start_token_id": 0,
3
+ "eos_token_id": 1,
4
+ "pad_token_id": 0,
5
+ "transformers_version": "4.35.1"
6
+ }
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bcc4bfa26449a669fcf4e2bd2006218fa37d9141b3c0e6ed1720dba59a9fd65
3
- size 2622266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:429697b20841c0a9fd079b1a74b4ff888b57e7702cf5c31672e056d2205b537b
3
+ size 1256
last-checkpoint/rng_state.pth CHANGED
Binary files a/last-checkpoint/rng_state.pth and b/last-checkpoint/rng_state.pth differ
 
last-checkpoint/scheduler.pt CHANGED
Binary files a/last-checkpoint/scheduler.pt and b/last-checkpoint/scheduler.pt differ
 
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.995251417160034,
3
- "best_model_checkpoint": "dq158/coqui/checkpoint-201740",
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 201740,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11,2522 +11,508 @@
11
  {
12
  "epoch": 0.01,
13
  "learning_rate": 8e-05,
14
- "loss": 4.5486,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.02,
19
- "learning_rate": 7.999996629129878e-05,
20
- "loss": 3.9512,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 0.04,
25
- "learning_rate": 7.999986516525194e-05,
26
- "loss": 3.7313,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 0.05,
31
- "learning_rate": 7.999969662202991e-05,
32
- "loss": 3.6577,
33
  "step": 2000
34
  },
35
  {
36
  "epoch": 0.06,
37
- "learning_rate": 7.999946066191677e-05,
38
- "loss": 3.5469,
39
  "step": 2500
40
  },
41
  {
42
  "epoch": 0.07,
43
- "learning_rate": 7.999915728531019e-05,
44
- "loss": 3.7429,
45
  "step": 3000
46
  },
47
  {
48
  "epoch": 0.09,
49
- "learning_rate": 7.999878649272152e-05,
50
- "loss": 3.5995,
51
  "step": 3500
52
  },
53
  {
54
  "epoch": 0.1,
55
- "learning_rate": 7.99983482847757e-05,
56
- "loss": 3.4906,
57
  "step": 4000
58
  },
59
  {
60
  "epoch": 0.11,
61
- "learning_rate": 7.999784266221129e-05,
62
- "loss": 3.6012,
63
  "step": 4500
64
  },
65
  {
66
  "epoch": 0.12,
67
- "learning_rate": 7.99972696258805e-05,
68
- "loss": 3.4275,
69
  "step": 5000
70
  },
71
  {
72
  "epoch": 0.14,
73
- "learning_rate": 7.999662917674914e-05,
74
- "loss": 3.5431,
75
  "step": 5500
76
  },
77
  {
78
  "epoch": 0.15,
79
- "learning_rate": 7.999592131589664e-05,
80
- "loss": 3.4628,
81
  "step": 6000
82
  },
83
  {
84
  "epoch": 0.16,
85
- "learning_rate": 7.999514604451606e-05,
86
- "loss": 3.5663,
87
  "step": 6500
88
  },
89
  {
90
  "epoch": 0.17,
91
- "learning_rate": 7.999430336391406e-05,
92
- "loss": 3.4909,
93
  "step": 7000
94
  },
95
  {
96
  "epoch": 0.19,
97
- "learning_rate": 7.999339327551093e-05,
98
- "loss": 3.4715,
99
  "step": 7500
100
  },
101
  {
102
  "epoch": 0.2,
103
- "learning_rate": 7.999241578084059e-05,
104
- "loss": 3.4117,
105
  "step": 8000
106
  },
107
  {
108
  "epoch": 0.21,
109
- "learning_rate": 7.999137088155049e-05,
110
- "loss": 3.5166,
111
  "step": 8500
112
  },
113
  {
114
  "epoch": 0.22,
115
- "learning_rate": 7.999025857940177e-05,
116
- "loss": 3.4116,
117
  "step": 9000
118
  },
119
  {
120
  "epoch": 0.24,
121
- "learning_rate": 7.998907887626916e-05,
122
- "loss": 3.4845,
123
  "step": 9500
124
  },
125
  {
126
  "epoch": 0.25,
127
- "learning_rate": 7.998783177414093e-05,
128
- "loss": 3.5398,
129
  "step": 10000
130
  },
131
  {
132
  "epoch": 0.26,
133
- "learning_rate": 7.998651727511902e-05,
134
- "loss": 3.4251,
135
  "step": 10500
136
  },
137
  {
138
  "epoch": 0.27,
139
- "learning_rate": 7.998513538141894e-05,
140
- "loss": 3.4268,
141
  "step": 11000
142
  },
143
  {
144
  "epoch": 0.29,
145
- "learning_rate": 7.998368609536976e-05,
146
- "loss": 3.4133,
147
  "step": 11500
148
  },
149
  {
150
  "epoch": 0.3,
151
- "learning_rate": 7.998216941941417e-05,
152
- "loss": 3.4393,
153
  "step": 12000
154
  },
155
  {
156
  "epoch": 0.31,
157
- "learning_rate": 7.998058535610842e-05,
158
- "loss": 3.3059,
159
  "step": 12500
160
  },
161
  {
162
  "epoch": 0.32,
163
- "learning_rate": 7.997893390812236e-05,
164
- "loss": 3.3631,
165
  "step": 13000
166
  },
167
  {
168
- "epoch": 0.33,
169
- "learning_rate": 7.997721507823938e-05,
170
- "loss": 3.391,
171
  "step": 13500
172
  },
173
  {
174
  "epoch": 0.35,
175
- "learning_rate": 7.997542886935647e-05,
176
- "loss": 3.3573,
177
  "step": 14000
178
  },
179
  {
180
  "epoch": 0.36,
181
- "learning_rate": 7.997357528448417e-05,
182
- "loss": 3.3698,
183
  "step": 14500
184
  },
185
  {
186
  "epoch": 0.37,
187
- "learning_rate": 7.997165432674656e-05,
188
- "loss": 3.2463,
189
  "step": 15000
190
  },
191
  {
192
- "epoch": 0.38,
193
- "learning_rate": 7.996966599938132e-05,
194
- "loss": 3.3657,
195
  "step": 15500
196
  },
197
  {
198
  "epoch": 0.4,
199
- "learning_rate": 7.996761030573961e-05,
200
- "loss": 3.404,
201
  "step": 16000
202
  },
203
  {
204
  "epoch": 0.41,
205
- "learning_rate": 7.99654872492862e-05,
206
- "loss": 3.3893,
207
  "step": 16500
208
  },
209
  {
210
  "epoch": 0.42,
211
- "learning_rate": 7.996329683359935e-05,
212
- "loss": 3.3423,
213
  "step": 17000
214
  },
215
  {
216
- "epoch": 0.43,
217
- "learning_rate": 7.996103906237084e-05,
218
- "loss": 3.238,
219
  "step": 17500
220
  },
221
  {
222
  "epoch": 0.45,
223
- "learning_rate": 7.995871393940603e-05,
224
- "loss": 3.2994,
225
  "step": 18000
226
  },
227
  {
228
  "epoch": 0.46,
229
- "learning_rate": 7.995632146862375e-05,
230
- "loss": 3.2948,
231
  "step": 18500
232
  },
233
  {
234
  "epoch": 0.47,
235
- "learning_rate": 7.995386165405639e-05,
236
- "loss": 3.2603,
237
  "step": 19000
238
  },
239
  {
240
- "epoch": 0.48,
241
- "learning_rate": 7.995133449984973e-05,
242
- "loss": 3.3748,
243
  "step": 19500
244
  },
245
  {
246
  "epoch": 0.5,
247
- "learning_rate": 7.99487400102632e-05,
248
- "loss": 3.3827,
249
  "step": 20000
250
  },
251
  {
252
  "epoch": 0.51,
253
- "learning_rate": 7.99460781896696e-05,
254
- "loss": 3.2538,
255
  "step": 20500
256
  },
257
  {
258
  "epoch": 0.52,
259
- "learning_rate": 7.994334904255528e-05,
260
- "loss": 3.2942,
261
  "step": 21000
262
  },
263
  {
264
- "epoch": 0.53,
265
- "learning_rate": 7.994055257352003e-05,
266
- "loss": 3.439,
267
  "step": 21500
268
  },
269
  {
270
  "epoch": 0.55,
271
- "learning_rate": 7.993768878727711e-05,
272
- "loss": 3.2844,
273
  "step": 22000
274
  },
275
  {
276
  "epoch": 0.56,
277
- "learning_rate": 7.993475768865324e-05,
278
- "loss": 3.4043,
279
  "step": 22500
280
  },
281
  {
282
  "epoch": 0.57,
283
- "learning_rate": 7.993175928258863e-05,
284
- "loss": 3.2896,
285
  "step": 23000
286
  },
287
  {
288
- "epoch": 0.58,
289
- "learning_rate": 7.992869357413687e-05,
290
- "loss": 3.3379,
291
  "step": 23500
292
  },
293
  {
294
- "epoch": 0.59,
295
- "learning_rate": 7.992556056846502e-05,
296
- "loss": 3.3279,
297
  "step": 24000
298
  },
299
  {
300
  "epoch": 0.61,
301
- "learning_rate": 7.992236027085355e-05,
302
- "loss": 3.3186,
303
  "step": 24500
304
  },
305
  {
306
  "epoch": 0.62,
307
- "learning_rate": 7.991909268669638e-05,
308
- "loss": 3.2858,
309
  "step": 25000
310
  },
311
  {
312
  "epoch": 0.63,
313
- "learning_rate": 7.991575782150078e-05,
314
- "loss": 3.366,
315
  "step": 25500
316
  },
317
  {
318
- "epoch": 0.64,
319
- "learning_rate": 7.991235568088746e-05,
320
- "loss": 3.2549,
321
  "step": 26000
322
  },
323
  {
324
  "epoch": 0.66,
325
- "learning_rate": 7.990888627059051e-05,
326
- "loss": 3.3427,
327
  "step": 26500
328
  },
329
  {
330
  "epoch": 0.67,
331
- "learning_rate": 7.990534959645739e-05,
332
- "loss": 3.2713,
333
  "step": 27000
334
  },
335
  {
336
  "epoch": 0.68,
337
- "learning_rate": 7.990174566444893e-05,
338
- "loss": 3.2461,
339
  "step": 27500
340
  },
341
  {
342
- "epoch": 0.69,
343
- "learning_rate": 7.989807448063936e-05,
344
- "loss": 3.2944,
345
  "step": 28000
346
  },
347
  {
348
  "epoch": 0.71,
349
- "learning_rate": 7.989433605121617e-05,
350
- "loss": 3.3129,
351
  "step": 28500
352
  },
353
  {
354
  "epoch": 0.72,
355
- "learning_rate": 7.989053038248027e-05,
356
- "loss": 3.268,
357
  "step": 29000
358
  },
359
  {
360
  "epoch": 0.73,
361
- "learning_rate": 7.988665748084586e-05,
362
- "loss": 3.3362,
363
  "step": 29500
364
  },
365
  {
366
- "epoch": 0.74,
367
- "learning_rate": 7.988271735284046e-05,
368
- "loss": 3.2817,
369
  "step": 30000
370
  },
371
  {
372
  "epoch": 0.76,
373
- "learning_rate": 7.987871000510492e-05,
374
- "loss": 3.359,
375
  "step": 30500
376
  },
377
  {
378
  "epoch": 0.77,
379
- "learning_rate": 7.987463544439334e-05,
380
- "loss": 3.2771,
381
  "step": 31000
382
  },
383
  {
384
  "epoch": 0.78,
385
- "learning_rate": 7.987049367757314e-05,
386
- "loss": 3.3784,
387
  "step": 31500
388
  },
389
  {
390
- "epoch": 0.79,
391
- "learning_rate": 7.9866284711625e-05,
392
- "loss": 3.3053,
393
  "step": 32000
394
  },
395
  {
396
  "epoch": 0.81,
397
- "learning_rate": 7.986200855364284e-05,
398
- "loss": 3.3062,
399
  "step": 32500
400
  },
401
  {
402
  "epoch": 0.82,
403
- "learning_rate": 7.985766521083388e-05,
404
- "loss": 3.2485,
405
  "step": 33000
406
  },
407
  {
408
  "epoch": 0.83,
409
- "learning_rate": 7.985325469051851e-05,
410
- "loss": 3.2577,
411
  "step": 33500
412
  },
413
  {
414
- "epoch": 0.84,
415
- "learning_rate": 7.98487770001304e-05,
416
- "loss": 3.3197,
417
  "step": 34000
418
  },
419
  {
420
  "epoch": 0.86,
421
- "learning_rate": 7.984423214721638e-05,
422
- "loss": 3.206,
423
  "step": 34500
424
  },
425
  {
426
  "epoch": 0.87,
427
- "learning_rate": 7.983962013943654e-05,
428
- "loss": 3.3556,
429
  "step": 35000
430
  },
431
  {
432
  "epoch": 0.88,
433
- "learning_rate": 7.983494098456408e-05,
434
- "loss": 3.3232,
435
  "step": 35500
436
  },
437
  {
438
- "epoch": 0.89,
439
- "learning_rate": 7.983019469048544e-05,
440
- "loss": 3.2438,
441
  "step": 36000
442
  },
443
  {
444
- "epoch": 0.9,
445
- "learning_rate": 7.982538126520019e-05,
446
- "loss": 3.3,
447
  "step": 36500
448
  },
449
  {
450
  "epoch": 0.92,
451
- "learning_rate": 7.982050071682102e-05,
452
- "loss": 3.3622,
453
  "step": 37000
454
  },
455
  {
456
  "epoch": 0.93,
457
- "learning_rate": 7.981555305357381e-05,
458
- "loss": 3.2414,
459
  "step": 37500
460
  },
461
  {
462
- "epoch": 0.94,
463
- "learning_rate": 7.981053828379751e-05,
464
- "loss": 3.3854,
465
  "step": 38000
466
  },
467
  {
468
- "epoch": 0.95,
469
- "learning_rate": 7.980545641594418e-05,
470
- "loss": 3.2529,
471
  "step": 38500
472
  },
473
  {
474
  "epoch": 0.97,
475
- "learning_rate": 7.9800307458579e-05,
476
- "loss": 3.3274,
477
  "step": 39000
478
  },
479
  {
480
  "epoch": 0.98,
481
- "learning_rate": 7.979509142038018e-05,
482
- "loss": 3.2614,
483
  "step": 39500
484
  },
485
- {
486
- "epoch": 0.99,
487
- "learning_rate": 7.978980831013903e-05,
488
- "loss": 3.3069,
489
- "step": 40000
490
- },
491
  {
492
  "epoch": 1.0,
493
- "eval_bleu": 1.0,
494
- "eval_brevity_penalty": 1.0,
495
- "eval_length_ratio": 1.0,
496
- "eval_loss": 3.12583589553833,
497
- "eval_precisions": [
498
- 1.0,
499
- 1.0,
500
- 1.0,
501
- 1.0
502
- ],
503
- "eval_reference_length": 4591104,
504
- "eval_runtime": 8030.2897,
505
- "eval_samples_per_second": 1.117,
506
- "eval_steps_per_second": 0.558,
507
- "eval_translation_length": 4591104,
508
- "step": 40348
509
  },
510
  {
511
  "epoch": 1.0,
512
- "learning_rate": 7.978445813675987e-05,
513
- "loss": 3.2673,
514
- "step": 40500
515
- },
516
- {
517
- "epoch": 1.02,
518
- "learning_rate": 7.977904090926009e-05,
519
- "loss": 3.3218,
520
- "step": 41000
521
- },
522
- {
523
- "epoch": 1.03,
524
- "learning_rate": 7.977355663677008e-05,
525
- "loss": 3.1583,
526
- "step": 41500
527
- },
528
- {
529
- "epoch": 1.04,
530
- "learning_rate": 7.97680053285332e-05,
531
- "loss": 3.2518,
532
- "step": 42000
533
- },
534
- {
535
- "epoch": 1.05,
536
- "learning_rate": 7.976238699390583e-05,
537
- "loss": 3.2414,
538
- "step": 42500
539
- },
540
- {
541
- "epoch": 1.07,
542
- "learning_rate": 7.975670164235732e-05,
543
- "loss": 3.2775,
544
- "step": 43000
545
- },
546
- {
547
- "epoch": 1.08,
548
- "learning_rate": 7.975094928346996e-05,
549
- "loss": 3.1861,
550
- "step": 43500
551
- },
552
- {
553
- "epoch": 1.09,
554
- "learning_rate": 7.974512992693896e-05,
555
- "loss": 3.1894,
556
- "step": 44000
557
- },
558
- {
559
- "epoch": 1.1,
560
- "learning_rate": 7.973924358257246e-05,
561
- "loss": 3.2074,
562
- "step": 44500
563
- },
564
- {
565
- "epoch": 1.12,
566
- "learning_rate": 7.973329026029155e-05,
567
- "loss": 3.2695,
568
- "step": 45000
569
- },
570
- {
571
- "epoch": 1.13,
572
- "learning_rate": 7.972726997013014e-05,
573
- "loss": 3.3168,
574
- "step": 45500
575
- },
576
- {
577
- "epoch": 1.14,
578
- "learning_rate": 7.972118272223505e-05,
579
- "loss": 3.2341,
580
- "step": 46000
581
- },
582
- {
583
- "epoch": 1.15,
584
- "learning_rate": 7.971502852686593e-05,
585
- "loss": 3.2581,
586
- "step": 46500
587
- },
588
- {
589
- "epoch": 1.16,
590
- "learning_rate": 7.970880739439528e-05,
591
- "loss": 3.2041,
592
- "step": 47000
593
- },
594
- {
595
- "epoch": 1.18,
596
- "learning_rate": 7.970251933530842e-05,
597
- "loss": 3.2144,
598
- "step": 47500
599
- },
600
- {
601
- "epoch": 1.19,
602
- "learning_rate": 7.969616436020346e-05,
603
- "loss": 3.2718,
604
- "step": 48000
605
- },
606
- {
607
- "epoch": 1.2,
608
- "learning_rate": 7.96897424797913e-05,
609
- "loss": 3.186,
610
- "step": 48500
611
- },
612
- {
613
- "epoch": 1.21,
614
- "learning_rate": 7.968325370489561e-05,
615
- "loss": 3.1968,
616
- "step": 49000
617
- },
618
- {
619
- "epoch": 1.23,
620
- "learning_rate": 7.967669804645278e-05,
621
- "loss": 3.2777,
622
- "step": 49500
623
- },
624
- {
625
- "epoch": 1.24,
626
- "learning_rate": 7.967007551551196e-05,
627
- "loss": 3.2775,
628
- "step": 50000
629
- },
630
- {
631
- "epoch": 1.25,
632
- "learning_rate": 7.9663386123235e-05,
633
- "loss": 3.2174,
634
- "step": 50500
635
- },
636
- {
637
- "epoch": 1.26,
638
- "learning_rate": 7.965662988089642e-05,
639
- "loss": 3.2144,
640
- "step": 51000
641
- },
642
- {
643
- "epoch": 1.28,
644
- "learning_rate": 7.964980679988345e-05,
645
- "loss": 3.3125,
646
- "step": 51500
647
- },
648
- {
649
- "epoch": 1.29,
650
- "learning_rate": 7.964291689169593e-05,
651
- "loss": 3.1662,
652
- "step": 52000
653
- },
654
- {
655
- "epoch": 1.3,
656
- "learning_rate": 7.963596016794635e-05,
657
- "loss": 3.223,
658
- "step": 52500
659
- },
660
- {
661
- "epoch": 1.31,
662
- "learning_rate": 7.962893664035984e-05,
663
- "loss": 3.1838,
664
- "step": 53000
665
- },
666
- {
667
- "epoch": 1.33,
668
- "learning_rate": 7.962184632077407e-05,
669
- "loss": 3.3471,
670
- "step": 53500
671
- },
672
- {
673
- "epoch": 1.34,
674
- "learning_rate": 7.961468922113935e-05,
675
- "loss": 3.1718,
676
- "step": 54000
677
- },
678
- {
679
- "epoch": 1.35,
680
- "learning_rate": 7.960746535351847e-05,
681
- "loss": 3.2135,
682
- "step": 54500
683
- },
684
- {
685
- "epoch": 1.36,
686
- "learning_rate": 7.96001747300868e-05,
687
- "loss": 3.1969,
688
- "step": 55000
689
- },
690
- {
691
- "epoch": 1.38,
692
- "learning_rate": 7.959281736313223e-05,
693
- "loss": 3.2402,
694
- "step": 55500
695
- },
696
- {
697
- "epoch": 1.39,
698
- "learning_rate": 7.958539326505511e-05,
699
- "loss": 3.2797,
700
- "step": 56000
701
- },
702
- {
703
- "epoch": 1.4,
704
- "learning_rate": 7.957790244836829e-05,
705
- "loss": 3.2674,
706
- "step": 56500
707
- },
708
- {
709
- "epoch": 1.41,
710
- "learning_rate": 7.957034492569703e-05,
711
- "loss": 3.1757,
712
- "step": 57000
713
- },
714
- {
715
- "epoch": 1.43,
716
- "learning_rate": 7.956272070977906e-05,
717
- "loss": 3.2459,
718
- "step": 57500
719
- },
720
- {
721
- "epoch": 1.44,
722
- "learning_rate": 7.955502981346449e-05,
723
- "loss": 3.2732,
724
- "step": 58000
725
- },
726
- {
727
- "epoch": 1.45,
728
- "learning_rate": 7.954727224971583e-05,
729
- "loss": 3.1479,
730
- "step": 58500
731
- },
732
- {
733
- "epoch": 1.46,
734
- "learning_rate": 7.953944803160796e-05,
735
- "loss": 3.2534,
736
- "step": 59000
737
- },
738
- {
739
- "epoch": 1.47,
740
- "learning_rate": 7.953155717232809e-05,
741
- "loss": 3.26,
742
- "step": 59500
743
- },
744
- {
745
- "epoch": 1.49,
746
- "learning_rate": 7.952359968517573e-05,
747
- "loss": 3.1955,
748
- "step": 60000
749
- },
750
- {
751
- "epoch": 1.5,
752
- "learning_rate": 7.951557558356275e-05,
753
- "loss": 3.1423,
754
- "step": 60500
755
- },
756
- {
757
- "epoch": 1.51,
758
- "learning_rate": 7.95074848810132e-05,
759
- "loss": 3.2203,
760
- "step": 61000
761
- },
762
- {
763
- "epoch": 1.52,
764
- "learning_rate": 7.949932759116346e-05,
765
- "loss": 3.2274,
766
- "step": 61500
767
- },
768
- {
769
- "epoch": 1.54,
770
- "learning_rate": 7.949110372776213e-05,
771
- "loss": 3.2456,
772
- "step": 62000
773
- },
774
- {
775
- "epoch": 1.55,
776
- "learning_rate": 7.948281330466996e-05,
777
- "loss": 3.2285,
778
- "step": 62500
779
- },
780
- {
781
- "epoch": 1.56,
782
- "learning_rate": 7.947445633585995e-05,
783
- "loss": 3.1965,
784
- "step": 63000
785
- },
786
- {
787
- "epoch": 1.57,
788
- "learning_rate": 7.946603283541722e-05,
789
- "loss": 3.2021,
790
- "step": 63500
791
- },
792
- {
793
- "epoch": 1.59,
794
- "learning_rate": 7.945754281753901e-05,
795
- "loss": 3.1108,
796
- "step": 64000
797
- },
798
- {
799
- "epoch": 1.6,
800
- "learning_rate": 7.944898629653475e-05,
801
- "loss": 3.1778,
802
- "step": 64500
803
- },
804
- {
805
- "epoch": 1.61,
806
- "learning_rate": 7.944036328682583e-05,
807
- "loss": 3.2267,
808
- "step": 65000
809
- },
810
- {
811
- "epoch": 1.62,
812
- "learning_rate": 7.943167380294583e-05,
813
- "loss": 3.1233,
814
- "step": 65500
815
- },
816
- {
817
- "epoch": 1.64,
818
- "learning_rate": 7.942291785954028e-05,
819
- "loss": 3.3035,
820
- "step": 66000
821
- },
822
- {
823
- "epoch": 1.65,
824
- "learning_rate": 7.941409547136677e-05,
825
- "loss": 3.1858,
826
- "step": 66500
827
- },
828
- {
829
- "epoch": 1.66,
830
- "learning_rate": 7.940520665329483e-05,
831
- "loss": 3.122,
832
- "step": 67000
833
- },
834
- {
835
- "epoch": 1.67,
836
- "learning_rate": 7.939625142030604e-05,
837
- "loss": 3.2538,
838
- "step": 67500
839
- },
840
- {
841
- "epoch": 1.69,
842
- "learning_rate": 7.93872297874938e-05,
843
- "loss": 3.1382,
844
- "step": 68000
845
- },
846
- {
847
- "epoch": 1.7,
848
- "learning_rate": 7.937814177006355e-05,
849
- "loss": 3.169,
850
- "step": 68500
851
- },
852
- {
853
- "epoch": 1.71,
854
- "learning_rate": 7.936898738333253e-05,
855
- "loss": 3.0855,
856
- "step": 69000
857
- },
858
- {
859
- "epoch": 1.72,
860
- "learning_rate": 7.935976664272985e-05,
861
- "loss": 3.2122,
862
- "step": 69500
863
- },
864
- {
865
- "epoch": 1.73,
866
- "learning_rate": 7.935047956379646e-05,
867
- "loss": 3.1769,
868
- "step": 70000
869
- },
870
- {
871
- "epoch": 1.75,
872
- "learning_rate": 7.934112616218515e-05,
873
- "loss": 3.2102,
874
- "step": 70500
875
- },
876
- {
877
- "epoch": 1.76,
878
- "learning_rate": 7.933170645366047e-05,
879
- "loss": 3.2092,
880
- "step": 71000
881
- },
882
- {
883
- "epoch": 1.77,
884
- "learning_rate": 7.932222045409874e-05,
885
- "loss": 3.195,
886
- "step": 71500
887
- },
888
- {
889
- "epoch": 1.78,
890
- "learning_rate": 7.931266817948795e-05,
891
- "loss": 3.1975,
892
- "step": 72000
893
- },
894
- {
895
- "epoch": 1.8,
896
- "learning_rate": 7.930304964592788e-05,
897
- "loss": 3.3457,
898
- "step": 72500
899
- },
900
- {
901
- "epoch": 1.81,
902
- "learning_rate": 7.929336486962994e-05,
903
- "loss": 3.0512,
904
- "step": 73000
905
- },
906
- {
907
- "epoch": 1.82,
908
- "learning_rate": 7.928361386691718e-05,
909
- "loss": 3.2071,
910
- "step": 73500
911
- },
912
- {
913
- "epoch": 1.83,
914
- "learning_rate": 7.927379665422429e-05,
915
- "loss": 3.2991,
916
- "step": 74000
917
- },
918
- {
919
- "epoch": 1.85,
920
- "learning_rate": 7.926391324809754e-05,
921
- "loss": 3.1947,
922
- "step": 74500
923
- },
924
- {
925
- "epoch": 1.86,
926
- "learning_rate": 7.925396366519476e-05,
927
- "loss": 3.2052,
928
- "step": 75000
929
- },
930
- {
931
- "epoch": 1.87,
932
- "learning_rate": 7.924394792228533e-05,
933
- "loss": 3.1196,
934
- "step": 75500
935
- },
936
- {
937
- "epoch": 1.88,
938
- "learning_rate": 7.923386603625016e-05,
939
- "loss": 3.1718,
940
- "step": 76000
941
- },
942
- {
943
- "epoch": 1.9,
944
- "learning_rate": 7.922371802408158e-05,
945
- "loss": 3.1563,
946
- "step": 76500
947
- },
948
- {
949
- "epoch": 1.91,
950
- "learning_rate": 7.921350390288342e-05,
951
- "loss": 3.2303,
952
- "step": 77000
953
- },
954
- {
955
- "epoch": 1.92,
956
- "learning_rate": 7.920322368987093e-05,
957
- "loss": 3.1139,
958
- "step": 77500
959
- },
960
- {
961
- "epoch": 1.93,
962
- "learning_rate": 7.919287740237071e-05,
963
- "loss": 3.207,
964
- "step": 78000
965
- },
966
- {
967
- "epoch": 1.95,
968
- "learning_rate": 7.918246505782079e-05,
969
- "loss": 3.1756,
970
- "step": 78500
971
- },
972
- {
973
- "epoch": 1.96,
974
- "learning_rate": 7.917198667377047e-05,
975
- "loss": 3.0951,
976
- "step": 79000
977
- },
978
- {
979
- "epoch": 1.97,
980
- "learning_rate": 7.916144226788041e-05,
981
- "loss": 3.2165,
982
- "step": 79500
983
- },
984
- {
985
- "epoch": 1.98,
986
- "learning_rate": 7.915083185792251e-05,
987
- "loss": 3.1236,
988
- "step": 80000
989
- },
990
- {
991
- "epoch": 2.0,
992
- "learning_rate": 7.914015546177993e-05,
993
- "loss": 3.1342,
994
- "step": 80500
995
- },
996
- {
997
- "epoch": 2.0,
998
- "eval_bleu": 1.0,
999
- "eval_brevity_penalty": 1.0,
1000
- "eval_length_ratio": 1.0,
1001
- "eval_loss": 3.0691702365875244,
1002
- "eval_precisions": [
1003
- 1.0,
1004
- 1.0,
1005
- 1.0,
1006
- 1.0
1007
- ],
1008
- "eval_reference_length": 4591104,
1009
- "eval_runtime": 8264.3842,
1010
- "eval_samples_per_second": 1.085,
1011
- "eval_steps_per_second": 0.543,
1012
- "eval_translation_length": 4591104,
1013
- "step": 80696
1014
- },
1015
- {
1016
- "epoch": 2.01,
1017
- "learning_rate": 7.912941309744704e-05,
1018
- "loss": 3.1234,
1019
- "step": 81000
1020
- },
1021
- {
1022
- "epoch": 2.02,
1023
- "learning_rate": 7.91186047830294e-05,
1024
- "loss": 3.1118,
1025
- "step": 81500
1026
- },
1027
- {
1028
- "epoch": 2.03,
1029
- "learning_rate": 7.910773053674371e-05,
1030
- "loss": 3.1879,
1031
- "step": 82000
1032
- },
1033
- {
1034
- "epoch": 2.04,
1035
- "learning_rate": 7.909679037691783e-05,
1036
- "loss": 3.2032,
1037
- "step": 82500
1038
- },
1039
- {
1040
- "epoch": 2.06,
1041
- "learning_rate": 7.908578432199069e-05,
1042
- "loss": 3.1009,
1043
- "step": 83000
1044
- },
1045
- {
1046
- "epoch": 2.07,
1047
- "learning_rate": 7.907471239051224e-05,
1048
- "loss": 3.0688,
1049
- "step": 83500
1050
- },
1051
- {
1052
- "epoch": 2.08,
1053
- "learning_rate": 7.906357460114355e-05,
1054
- "loss": 3.0835,
1055
- "step": 84000
1056
- },
1057
- {
1058
- "epoch": 2.09,
1059
- "learning_rate": 7.90523709726566e-05,
1060
- "loss": 3.1157,
1061
- "step": 84500
1062
- },
1063
- {
1064
- "epoch": 2.11,
1065
- "learning_rate": 7.90411015239344e-05,
1066
- "loss": 3.0581,
1067
- "step": 85000
1068
- },
1069
- {
1070
- "epoch": 2.12,
1071
- "learning_rate": 7.902976627397088e-05,
1072
- "loss": 3.206,
1073
- "step": 85500
1074
- },
1075
- {
1076
- "epoch": 2.13,
1077
- "learning_rate": 7.901836524187085e-05,
1078
- "loss": 3.2185,
1079
- "step": 86000
1080
- },
1081
- {
1082
- "epoch": 2.14,
1083
- "learning_rate": 7.900689844685002e-05,
1084
- "loss": 3.1204,
1085
- "step": 86500
1086
- },
1087
- {
1088
- "epoch": 2.16,
1089
- "learning_rate": 7.899536590823493e-05,
1090
- "loss": 3.2066,
1091
- "step": 87000
1092
- },
1093
- {
1094
- "epoch": 2.17,
1095
- "learning_rate": 7.898376764546293e-05,
1096
- "loss": 3.1684,
1097
- "step": 87500
1098
- },
1099
- {
1100
- "epoch": 2.18,
1101
- "learning_rate": 7.89721036780821e-05,
1102
- "loss": 3.1742,
1103
- "step": 88000
1104
- },
1105
- {
1106
- "epoch": 2.19,
1107
- "learning_rate": 7.896037402575136e-05,
1108
- "loss": 3.1598,
1109
- "step": 88500
1110
- },
1111
- {
1112
- "epoch": 2.21,
1113
- "learning_rate": 7.894857870824023e-05,
1114
- "loss": 3.0966,
1115
- "step": 89000
1116
- },
1117
- {
1118
- "epoch": 2.22,
1119
- "learning_rate": 7.893671774542899e-05,
1120
- "loss": 3.2035,
1121
- "step": 89500
1122
- },
1123
- {
1124
- "epoch": 2.23,
1125
- "learning_rate": 7.892479115730848e-05,
1126
- "loss": 3.215,
1127
- "step": 90000
1128
- },
1129
- {
1130
- "epoch": 2.24,
1131
- "learning_rate": 7.891279896398023e-05,
1132
- "loss": 3.153,
1133
- "step": 90500
1134
- },
1135
- {
1136
- "epoch": 2.26,
1137
- "learning_rate": 7.890074118565626e-05,
1138
- "loss": 3.1019,
1139
- "step": 91000
1140
- },
1141
- {
1142
- "epoch": 2.27,
1143
- "learning_rate": 7.88886178426592e-05,
1144
- "loss": 3.1509,
1145
- "step": 91500
1146
- },
1147
- {
1148
- "epoch": 2.28,
1149
- "learning_rate": 7.887642895542218e-05,
1150
- "loss": 3.1699,
1151
- "step": 92000
1152
- },
1153
- {
1154
- "epoch": 2.29,
1155
- "learning_rate": 7.886417454448872e-05,
1156
- "loss": 3.1162,
1157
- "step": 92500
1158
- },
1159
- {
1160
- "epoch": 2.3,
1161
- "learning_rate": 7.885185463051289e-05,
1162
- "loss": 3.1782,
1163
- "step": 93000
1164
- },
1165
- {
1166
- "epoch": 2.32,
1167
- "learning_rate": 7.883946923425907e-05,
1168
- "loss": 3.1546,
1169
- "step": 93500
1170
- },
1171
- {
1172
- "epoch": 2.33,
1173
- "learning_rate": 7.882701837660205e-05,
1174
- "loss": 3.1551,
1175
- "step": 94000
1176
- },
1177
- {
1178
- "epoch": 2.34,
1179
- "learning_rate": 7.881450207852696e-05,
1180
- "loss": 3.1327,
1181
- "step": 94500
1182
- },
1183
- {
1184
- "epoch": 2.35,
1185
- "learning_rate": 7.880192036112917e-05,
1186
- "loss": 3.0316,
1187
- "step": 95000
1188
- },
1189
- {
1190
- "epoch": 2.37,
1191
- "learning_rate": 7.878927324561437e-05,
1192
- "loss": 3.1897,
1193
- "step": 95500
1194
- },
1195
- {
1196
- "epoch": 2.38,
1197
- "learning_rate": 7.877656075329846e-05,
1198
- "loss": 3.1447,
1199
- "step": 96000
1200
- },
1201
- {
1202
- "epoch": 2.39,
1203
- "learning_rate": 7.876378290560751e-05,
1204
- "loss": 3.1764,
1205
- "step": 96500
1206
- },
1207
- {
1208
- "epoch": 2.4,
1209
- "learning_rate": 7.875093972407774e-05,
1210
- "loss": 3.1213,
1211
- "step": 97000
1212
- },
1213
- {
1214
- "epoch": 2.42,
1215
- "learning_rate": 7.873803123035553e-05,
1216
- "loss": 3.0774,
1217
- "step": 97500
1218
- },
1219
- {
1220
- "epoch": 2.43,
1221
- "learning_rate": 7.872505744619728e-05,
1222
- "loss": 3.1127,
1223
- "step": 98000
1224
- },
1225
- {
1226
- "epoch": 2.44,
1227
- "learning_rate": 7.871201839346947e-05,
1228
- "loss": 3.2621,
1229
- "step": 98500
1230
- },
1231
- {
1232
- "epoch": 2.45,
1233
- "learning_rate": 7.869891409414858e-05,
1234
- "loss": 3.2113,
1235
- "step": 99000
1236
- },
1237
- {
1238
- "epoch": 2.47,
1239
- "learning_rate": 7.868574457032106e-05,
1240
- "loss": 3.1218,
1241
- "step": 99500
1242
- },
1243
- {
1244
- "epoch": 2.48,
1245
- "learning_rate": 7.867250984418328e-05,
1246
- "loss": 3.1409,
1247
- "step": 100000
1248
- },
1249
- {
1250
- "epoch": 2.49,
1251
- "learning_rate": 7.86592099380415e-05,
1252
- "loss": 3.073,
1253
- "step": 100500
1254
- },
1255
- {
1256
- "epoch": 2.5,
1257
- "learning_rate": 7.864584487431186e-05,
1258
- "loss": 3.1624,
1259
- "step": 101000
1260
- },
1261
- {
1262
- "epoch": 2.52,
1263
- "learning_rate": 7.863241467552032e-05,
1264
- "loss": 3.2052,
1265
- "step": 101500
1266
- },
1267
- {
1268
- "epoch": 2.53,
1269
- "learning_rate": 7.861891936430258e-05,
1270
- "loss": 3.1714,
1271
- "step": 102000
1272
- },
1273
- {
1274
- "epoch": 2.54,
1275
- "learning_rate": 7.860535896340414e-05,
1276
- "loss": 3.1728,
1277
- "step": 102500
1278
- },
1279
- {
1280
- "epoch": 2.55,
1281
- "learning_rate": 7.859173349568015e-05,
1282
- "loss": 3.1564,
1283
- "step": 103000
1284
- },
1285
- {
1286
- "epoch": 2.57,
1287
- "learning_rate": 7.857804298409547e-05,
1288
- "loss": 3.0868,
1289
- "step": 103500
1290
- },
1291
- {
1292
- "epoch": 2.58,
1293
- "learning_rate": 7.856428745172455e-05,
1294
- "loss": 3.1485,
1295
- "step": 104000
1296
- },
1297
- {
1298
- "epoch": 2.59,
1299
- "learning_rate": 7.855046692175145e-05,
1300
- "loss": 3.2001,
1301
- "step": 104500
1302
- },
1303
- {
1304
- "epoch": 2.6,
1305
- "learning_rate": 7.853658141746979e-05,
1306
- "loss": 3.0651,
1307
- "step": 105000
1308
- },
1309
- {
1310
- "epoch": 2.61,
1311
- "learning_rate": 7.852263096228267e-05,
1312
- "loss": 3.1709,
1313
- "step": 105500
1314
- },
1315
- {
1316
- "epoch": 2.63,
1317
- "learning_rate": 7.850861557970269e-05,
1318
- "loss": 3.1635,
1319
- "step": 106000
1320
- },
1321
- {
1322
- "epoch": 2.64,
1323
- "learning_rate": 7.849453529335188e-05,
1324
- "loss": 3.1123,
1325
- "step": 106500
1326
- },
1327
- {
1328
- "epoch": 2.65,
1329
- "learning_rate": 7.848039012696161e-05,
1330
- "loss": 3.1162,
1331
- "step": 107000
1332
- },
1333
- {
1334
- "epoch": 2.66,
1335
- "learning_rate": 7.846618010437265e-05,
1336
- "loss": 3.1275,
1337
- "step": 107500
1338
- },
1339
- {
1340
- "epoch": 2.68,
1341
- "learning_rate": 7.84519052495351e-05,
1342
- "loss": 3.1035,
1343
- "step": 108000
1344
- },
1345
- {
1346
- "epoch": 2.69,
1347
- "learning_rate": 7.843756558650827e-05,
1348
- "loss": 3.0308,
1349
- "step": 108500
1350
- },
1351
- {
1352
- "epoch": 2.7,
1353
- "learning_rate": 7.842316113946073e-05,
1354
- "loss": 3.1311,
1355
- "step": 109000
1356
- },
1357
- {
1358
- "epoch": 2.71,
1359
- "learning_rate": 7.840869193267027e-05,
1360
- "loss": 3.1477,
1361
- "step": 109500
1362
- },
1363
- {
1364
- "epoch": 2.73,
1365
- "learning_rate": 7.839415799052378e-05,
1366
- "loss": 3.1221,
1367
- "step": 110000
1368
- },
1369
- {
1370
- "epoch": 2.74,
1371
- "learning_rate": 7.837955933751725e-05,
1372
- "loss": 3.1322,
1373
- "step": 110500
1374
- },
1375
- {
1376
- "epoch": 2.75,
1377
- "learning_rate": 7.83648959982558e-05,
1378
- "loss": 3.1488,
1379
- "step": 111000
1380
- },
1381
- {
1382
- "epoch": 2.76,
1383
- "learning_rate": 7.835016799745353e-05,
1384
- "loss": 3.0619,
1385
- "step": 111500
1386
- },
1387
- {
1388
- "epoch": 2.78,
1389
- "learning_rate": 7.833537535993351e-05,
1390
- "loss": 3.237,
1391
- "step": 112000
1392
- },
1393
- {
1394
- "epoch": 2.79,
1395
- "learning_rate": 7.83205181106278e-05,
1396
- "loss": 3.1022,
1397
- "step": 112500
1398
- },
1399
- {
1400
- "epoch": 2.8,
1401
- "learning_rate": 7.83055962745773e-05,
1402
- "loss": 3.066,
1403
- "step": 113000
1404
- },
1405
- {
1406
- "epoch": 2.81,
1407
- "learning_rate": 7.829060987693179e-05,
1408
- "loss": 3.1903,
1409
- "step": 113500
1410
- },
1411
- {
1412
- "epoch": 2.83,
1413
- "learning_rate": 7.827555894294991e-05,
1414
- "loss": 3.2208,
1415
- "step": 114000
1416
- },
1417
- {
1418
- "epoch": 2.84,
1419
- "learning_rate": 7.8260443497999e-05,
1420
- "loss": 3.2005,
1421
- "step": 114500
1422
- },
1423
- {
1424
- "epoch": 2.85,
1425
- "learning_rate": 7.824526356755516e-05,
1426
- "loss": 3.184,
1427
- "step": 115000
1428
- },
1429
- {
1430
- "epoch": 2.86,
1431
- "learning_rate": 7.82300191772032e-05,
1432
- "loss": 3.1185,
1433
- "step": 115500
1434
- },
1435
- {
1436
- "epoch": 2.87,
1437
- "learning_rate": 7.821471035263653e-05,
1438
- "loss": 3.1077,
1439
- "step": 116000
1440
- },
1441
- {
1442
- "epoch": 2.89,
1443
- "learning_rate": 7.819933711965718e-05,
1444
- "loss": 3.1901,
1445
- "step": 116500
1446
- },
1447
- {
1448
- "epoch": 2.9,
1449
- "learning_rate": 7.818389950417574e-05,
1450
- "loss": 3.1149,
1451
- "step": 117000
1452
- },
1453
- {
1454
- "epoch": 2.91,
1455
- "learning_rate": 7.816839753221132e-05,
1456
- "loss": 3.1393,
1457
- "step": 117500
1458
- },
1459
- {
1460
- "epoch": 2.92,
1461
- "learning_rate": 7.815283122989147e-05,
1462
- "loss": 3.0862,
1463
- "step": 118000
1464
- },
1465
- {
1466
- "epoch": 2.94,
1467
- "learning_rate": 7.813720062345219e-05,
1468
- "loss": 3.072,
1469
- "step": 118500
1470
- },
1471
- {
1472
- "epoch": 2.95,
1473
- "learning_rate": 7.812150573923785e-05,
1474
- "loss": 3.1103,
1475
- "step": 119000
1476
- },
1477
- {
1478
- "epoch": 2.96,
1479
- "learning_rate": 7.810574660370116e-05,
1480
- "loss": 3.2114,
1481
- "step": 119500
1482
- },
1483
- {
1484
- "epoch": 2.97,
1485
- "learning_rate": 7.808992324340312e-05,
1486
- "loss": 3.1425,
1487
- "step": 120000
1488
- },
1489
- {
1490
- "epoch": 2.99,
1491
- "learning_rate": 7.807403568501297e-05,
1492
- "loss": 3.1051,
1493
- "step": 120500
1494
- },
1495
- {
1496
- "epoch": 3.0,
1497
- "learning_rate": 7.805808395530816e-05,
1498
- "loss": 3.1355,
1499
- "step": 121000
1500
- },
1501
- {
1502
- "epoch": 3.0,
1503
- "eval_bleu": 1.0,
1504
- "eval_brevity_penalty": 1.0,
1505
- "eval_length_ratio": 1.0,
1506
- "eval_loss": 3.0360162258148193,
1507
- "eval_precisions": [
1508
- 1.0,
1509
- 1.0,
1510
- 1.0,
1511
- 1.0
1512
- ],
1513
- "eval_reference_length": 4591104,
1514
- "eval_runtime": 8117.2024,
1515
- "eval_samples_per_second": 1.105,
1516
- "eval_steps_per_second": 0.552,
1517
- "eval_translation_length": 4591104,
1518
- "step": 121044
1519
- },
1520
- {
1521
- "epoch": 3.01,
1522
- "learning_rate": 7.80420680811743e-05,
1523
- "loss": 3.0822,
1524
- "step": 121500
1525
- },
1526
- {
1527
- "epoch": 3.02,
1528
- "learning_rate": 7.80259880896051e-05,
1529
- "loss": 3.0585,
1530
- "step": 122000
1531
- },
1532
- {
1533
- "epoch": 3.04,
1534
- "learning_rate": 7.800984400770236e-05,
1535
- "loss": 3.1003,
1536
- "step": 122500
1537
- },
1538
- {
1539
- "epoch": 3.05,
1540
- "learning_rate": 7.799363586267584e-05,
1541
- "loss": 2.9734,
1542
- "step": 123000
1543
- },
1544
- {
1545
- "epoch": 3.06,
1546
- "learning_rate": 7.797736368184335e-05,
1547
- "loss": 3.1722,
1548
- "step": 123500
1549
- },
1550
- {
1551
- "epoch": 3.07,
1552
- "learning_rate": 7.79610274926306e-05,
1553
- "loss": 3.1007,
1554
- "step": 124000
1555
- },
1556
- {
1557
- "epoch": 3.09,
1558
- "learning_rate": 7.794462732257115e-05,
1559
- "loss": 3.0796,
1560
- "step": 124500
1561
- },
1562
- {
1563
- "epoch": 3.1,
1564
- "learning_rate": 7.792816319930645e-05,
1565
- "loss": 3.0691,
1566
- "step": 125000
1567
- },
1568
- {
1569
- "epoch": 3.11,
1570
- "learning_rate": 7.791163515058568e-05,
1571
- "loss": 3.1111,
1572
- "step": 125500
1573
- },
1574
- {
1575
- "epoch": 3.12,
1576
- "learning_rate": 7.78950432042658e-05,
1577
- "loss": 3.0355,
1578
- "step": 126000
1579
- },
1580
- {
1581
- "epoch": 3.14,
1582
- "learning_rate": 7.787838738831148e-05,
1583
- "loss": 3.0751,
1584
- "step": 126500
1585
- },
1586
- {
1587
- "epoch": 3.15,
1588
- "learning_rate": 7.786166773079499e-05,
1589
- "loss": 3.1197,
1590
- "step": 127000
1591
- },
1592
- {
1593
- "epoch": 3.16,
1594
- "learning_rate": 7.784488425989624e-05,
1595
- "loss": 3.0997,
1596
- "step": 127500
1597
- },
1598
- {
1599
- "epoch": 3.17,
1600
- "learning_rate": 7.782803700390268e-05,
1601
- "loss": 3.2085,
1602
- "step": 128000
1603
- },
1604
- {
1605
- "epoch": 3.18,
1606
- "learning_rate": 7.781112599120928e-05,
1607
- "loss": 3.1391,
1608
- "step": 128500
1609
- },
1610
- {
1611
- "epoch": 3.2,
1612
- "learning_rate": 7.779415125031842e-05,
1613
- "loss": 3.1137,
1614
- "step": 129000
1615
- },
1616
- {
1617
- "epoch": 3.21,
1618
- "learning_rate": 7.777711280983994e-05,
1619
- "loss": 3.1373,
1620
- "step": 129500
1621
- },
1622
- {
1623
- "epoch": 3.22,
1624
- "learning_rate": 7.776001069849104e-05,
1625
- "loss": 3.1228,
1626
- "step": 130000
1627
- },
1628
- {
1629
- "epoch": 3.23,
1630
- "learning_rate": 7.774284494509619e-05,
1631
- "loss": 3.0356,
1632
- "step": 130500
1633
- },
1634
- {
1635
- "epoch": 3.25,
1636
- "learning_rate": 7.772561557858717e-05,
1637
- "loss": 3.074,
1638
- "step": 131000
1639
- },
1640
- {
1641
- "epoch": 3.26,
1642
- "learning_rate": 7.770832262800298e-05,
1643
- "loss": 3.101,
1644
- "step": 131500
1645
- },
1646
- {
1647
- "epoch": 3.27,
1648
- "learning_rate": 7.769096612248972e-05,
1649
- "loss": 3.0026,
1650
- "step": 132000
1651
- },
1652
- {
1653
- "epoch": 3.28,
1654
- "learning_rate": 7.767354609130067e-05,
1655
- "loss": 2.9642,
1656
- "step": 132500
1657
- },
1658
- {
1659
- "epoch": 3.3,
1660
- "learning_rate": 7.765606256379617e-05,
1661
- "loss": 3.1591,
1662
- "step": 133000
1663
- },
1664
- {
1665
- "epoch": 3.31,
1666
- "learning_rate": 7.763851556944357e-05,
1667
- "loss": 3.1563,
1668
- "step": 133500
1669
- },
1670
- {
1671
- "epoch": 3.32,
1672
- "learning_rate": 7.762090513781717e-05,
1673
- "loss": 3.0032,
1674
- "step": 134000
1675
- },
1676
- {
1677
- "epoch": 3.33,
1678
- "learning_rate": 7.760323129859824e-05,
1679
- "loss": 3.0752,
1680
- "step": 134500
1681
- },
1682
- {
1683
- "epoch": 3.35,
1684
- "learning_rate": 7.758549408157487e-05,
1685
- "loss": 3.1228,
1686
- "step": 135000
1687
- },
1688
- {
1689
- "epoch": 3.36,
1690
- "learning_rate": 7.7567693516642e-05,
1691
- "loss": 3.1929,
1692
- "step": 135500
1693
- },
1694
- {
1695
- "epoch": 3.37,
1696
- "learning_rate": 7.754982963380129e-05,
1697
- "loss": 3.0963,
1698
- "step": 136000
1699
- },
1700
- {
1701
- "epoch": 3.38,
1702
- "learning_rate": 7.75319024631612e-05,
1703
- "loss": 3.1731,
1704
- "step": 136500
1705
- },
1706
- {
1707
- "epoch": 3.4,
1708
- "learning_rate": 7.751391203493678e-05,
1709
- "loss": 3.1822,
1710
- "step": 137000
1711
- },
1712
- {
1713
- "epoch": 3.41,
1714
- "learning_rate": 7.749585837944974e-05,
1715
- "loss": 3.0849,
1716
- "step": 137500
1717
- },
1718
- {
1719
- "epoch": 3.42,
1720
- "learning_rate": 7.747774152712836e-05,
1721
- "loss": 3.1609,
1722
- "step": 138000
1723
- },
1724
- {
1725
- "epoch": 3.43,
1726
- "learning_rate": 7.745956150850738e-05,
1727
- "loss": 3.0784,
1728
- "step": 138500
1729
- },
1730
- {
1731
- "epoch": 3.45,
1732
- "learning_rate": 7.744131835422808e-05,
1733
- "loss": 3.1391,
1734
- "step": 139000
1735
- },
1736
- {
1737
- "epoch": 3.46,
1738
- "learning_rate": 7.74230120950381e-05,
1739
- "loss": 3.1141,
1740
- "step": 139500
1741
- },
1742
- {
1743
- "epoch": 3.47,
1744
- "learning_rate": 7.740464276179143e-05,
1745
- "loss": 3.092,
1746
- "step": 140000
1747
- },
1748
- {
1749
- "epoch": 3.48,
1750
- "learning_rate": 7.738621038544842e-05,
1751
- "loss": 3.0634,
1752
- "step": 140500
1753
- },
1754
- {
1755
- "epoch": 3.49,
1756
- "learning_rate": 7.736771499707562e-05,
1757
- "loss": 3.1131,
1758
- "step": 141000
1759
- },
1760
- {
1761
- "epoch": 3.51,
1762
- "learning_rate": 7.734915662784582e-05,
1763
- "loss": 3.1014,
1764
- "step": 141500
1765
- },
1766
- {
1767
- "epoch": 3.52,
1768
- "learning_rate": 7.733053530903793e-05,
1769
- "loss": 3.1118,
1770
- "step": 142000
1771
- },
1772
- {
1773
- "epoch": 3.53,
1774
- "learning_rate": 7.7311851072037e-05,
1775
- "loss": 3.0762,
1776
- "step": 142500
1777
- },
1778
- {
1779
- "epoch": 3.54,
1780
- "learning_rate": 7.729310394833408e-05,
1781
- "loss": 3.0613,
1782
- "step": 143000
1783
- },
1784
- {
1785
- "epoch": 3.56,
1786
- "learning_rate": 7.727429396952622e-05,
1787
- "loss": 3.1007,
1788
- "step": 143500
1789
- },
1790
- {
1791
- "epoch": 3.57,
1792
- "learning_rate": 7.725542116731643e-05,
1793
- "loss": 3.0766,
1794
- "step": 144000
1795
- },
1796
- {
1797
- "epoch": 3.58,
1798
- "learning_rate": 7.72364855735136e-05,
1799
- "loss": 3.0842,
1800
- "step": 144500
1801
- },
1802
- {
1803
- "epoch": 3.59,
1804
- "learning_rate": 7.721748722003242e-05,
1805
- "loss": 3.1643,
1806
- "step": 145000
1807
- },
1808
- {
1809
- "epoch": 3.61,
1810
- "learning_rate": 7.719842613889342e-05,
1811
- "loss": 3.0702,
1812
- "step": 145500
1813
- },
1814
- {
1815
- "epoch": 3.62,
1816
- "learning_rate": 7.717930236222277e-05,
1817
- "loss": 3.2394,
1818
- "step": 146000
1819
- },
1820
- {
1821
- "epoch": 3.63,
1822
- "learning_rate": 7.716011592225239e-05,
1823
- "loss": 3.0847,
1824
- "step": 146500
1825
- },
1826
- {
1827
- "epoch": 3.64,
1828
- "learning_rate": 7.714086685131975e-05,
1829
- "loss": 3.1536,
1830
- "step": 147000
1831
- },
1832
- {
1833
- "epoch": 3.66,
1834
- "learning_rate": 7.712155518186792e-05,
1835
- "loss": 3.0908,
1836
- "step": 147500
1837
- },
1838
- {
1839
- "epoch": 3.67,
1840
- "learning_rate": 7.710218094644548e-05,
1841
- "loss": 3.0379,
1842
- "step": 148000
1843
- },
1844
- {
1845
- "epoch": 3.68,
1846
- "learning_rate": 7.708274417770644e-05,
1847
- "loss": 3.0513,
1848
- "step": 148500
1849
- },
1850
- {
1851
- "epoch": 3.69,
1852
- "learning_rate": 7.706324490841019e-05,
1853
- "loss": 3.1075,
1854
- "step": 149000
1855
- },
1856
- {
1857
- "epoch": 3.71,
1858
- "learning_rate": 7.704368317142151e-05,
1859
- "loss": 3.1261,
1860
- "step": 149500
1861
- },
1862
- {
1863
- "epoch": 3.72,
1864
- "learning_rate": 7.702405899971042e-05,
1865
- "loss": 3.164,
1866
- "step": 150000
1867
- },
1868
- {
1869
- "epoch": 3.73,
1870
- "learning_rate": 7.700437242635218e-05,
1871
- "loss": 3.0038,
1872
- "step": 150500
1873
- },
1874
- {
1875
- "epoch": 3.74,
1876
- "learning_rate": 7.698462348452724e-05,
1877
- "loss": 3.1165,
1878
- "step": 151000
1879
- },
1880
- {
1881
- "epoch": 3.75,
1882
- "learning_rate": 7.696481220752119e-05,
1883
- "loss": 3.048,
1884
- "step": 151500
1885
- },
1886
- {
1887
- "epoch": 3.77,
1888
- "learning_rate": 7.694493862872459e-05,
1889
- "loss": 3.0922,
1890
- "step": 152000
1891
- },
1892
- {
1893
- "epoch": 3.78,
1894
- "learning_rate": 7.69250027816331e-05,
1895
- "loss": 3.1324,
1896
- "step": 152500
1897
- },
1898
- {
1899
- "epoch": 3.79,
1900
- "learning_rate": 7.69050046998473e-05,
1901
- "loss": 3.1076,
1902
- "step": 153000
1903
- },
1904
- {
1905
- "epoch": 3.8,
1906
- "learning_rate": 7.688494441707267e-05,
1907
- "loss": 3.0944,
1908
- "step": 153500
1909
- },
1910
- {
1911
- "epoch": 3.82,
1912
- "learning_rate": 7.686482196711948e-05,
1913
- "loss": 3.0502,
1914
- "step": 154000
1915
- },
1916
- {
1917
- "epoch": 3.83,
1918
- "learning_rate": 7.684463738390284e-05,
1919
- "loss": 3.0757,
1920
- "step": 154500
1921
- },
1922
- {
1923
- "epoch": 3.84,
1924
- "learning_rate": 7.682439070144252e-05,
1925
- "loss": 3.0951,
1926
- "step": 155000
1927
- },
1928
- {
1929
- "epoch": 3.85,
1930
- "learning_rate": 7.680408195386303e-05,
1931
- "loss": 3.0857,
1932
- "step": 155500
1933
- },
1934
- {
1935
- "epoch": 3.87,
1936
- "learning_rate": 7.678371117539342e-05,
1937
- "loss": 3.0341,
1938
- "step": 156000
1939
- },
1940
- {
1941
- "epoch": 3.88,
1942
- "learning_rate": 7.676327840036733e-05,
1943
- "loss": 3.0872,
1944
- "step": 156500
1945
- },
1946
- {
1947
- "epoch": 3.89,
1948
- "learning_rate": 7.674278366322286e-05,
1949
- "loss": 3.0744,
1950
- "step": 157000
1951
- },
1952
- {
1953
- "epoch": 3.9,
1954
- "learning_rate": 7.672222699850256e-05,
1955
- "loss": 3.0927,
1956
- "step": 157500
1957
- },
1958
- {
1959
- "epoch": 3.92,
1960
- "learning_rate": 7.670160844085336e-05,
1961
- "loss": 3.0717,
1962
- "step": 158000
1963
- },
1964
- {
1965
- "epoch": 3.93,
1966
- "learning_rate": 7.668092802502651e-05,
1967
- "loss": 2.964,
1968
- "step": 158500
1969
- },
1970
- {
1971
- "epoch": 3.94,
1972
- "learning_rate": 7.666018578587749e-05,
1973
- "loss": 2.9958,
1974
- "step": 159000
1975
- },
1976
- {
1977
- "epoch": 3.95,
1978
- "learning_rate": 7.663938175836599e-05,
1979
- "loss": 3.1135,
1980
- "step": 159500
1981
- },
1982
- {
1983
- "epoch": 3.97,
1984
- "learning_rate": 7.661851597755588e-05,
1985
- "loss": 3.0903,
1986
- "step": 160000
1987
- },
1988
- {
1989
- "epoch": 3.98,
1990
- "learning_rate": 7.659758847861505e-05,
1991
- "loss": 3.1399,
1992
- "step": 160500
1993
- },
1994
- {
1995
- "epoch": 3.99,
1996
- "learning_rate": 7.657659929681545e-05,
1997
- "loss": 3.0583,
1998
- "step": 161000
1999
- },
2000
- {
2001
- "epoch": 4.0,
2002
- "eval_bleu": 1.0,
2003
- "eval_brevity_penalty": 1.0,
2004
- "eval_length_ratio": 1.0,
2005
- "eval_loss": 3.0066018104553223,
2006
- "eval_precisions": [
2007
- 1.0,
2008
- 1.0,
2009
- 1.0,
2010
- 1.0
2011
- ],
2012
- "eval_reference_length": 4591104,
2013
- "eval_runtime": 8243.7716,
2014
- "eval_samples_per_second": 1.088,
2015
- "eval_steps_per_second": 0.544,
2016
- "eval_translation_length": 4591104,
2017
- "step": 161392
2018
- },
2019
- {
2020
- "epoch": 4.0,
2021
- "learning_rate": 7.655554846753297e-05,
2022
- "loss": 3.1442,
2023
- "step": 161500
2024
- },
2025
- {
2026
- "epoch": 4.02,
2027
- "learning_rate": 7.653443602624745e-05,
2028
- "loss": 3.079,
2029
- "step": 162000
2030
- },
2031
- {
2032
- "epoch": 4.03,
2033
- "learning_rate": 7.651326200854252e-05,
2034
- "loss": 3.1155,
2035
- "step": 162500
2036
- },
2037
- {
2038
- "epoch": 4.04,
2039
- "learning_rate": 7.64920264501056e-05,
2040
- "loss": 3.034,
2041
- "step": 163000
2042
- },
2043
- {
2044
- "epoch": 4.05,
2045
- "learning_rate": 7.647072938672785e-05,
2046
- "loss": 3.1226,
2047
- "step": 163500
2048
- },
2049
- {
2050
- "epoch": 4.06,
2051
- "learning_rate": 7.644937085430409e-05,
2052
- "loss": 3.1027,
2053
- "step": 164000
2054
- },
2055
- {
2056
- "epoch": 4.08,
2057
- "learning_rate": 7.642795088883274e-05,
2058
- "loss": 3.1736,
2059
- "step": 164500
2060
- },
2061
- {
2062
- "epoch": 4.09,
2063
- "learning_rate": 7.640646952641577e-05,
2064
- "loss": 2.9279,
2065
- "step": 165000
2066
- },
2067
- {
2068
- "epoch": 4.1,
2069
- "learning_rate": 7.638492680325862e-05,
2070
- "loss": 3.0596,
2071
- "step": 165500
2072
- },
2073
- {
2074
- "epoch": 4.11,
2075
- "learning_rate": 7.636332275567012e-05,
2076
- "loss": 3.0027,
2077
- "step": 166000
2078
- },
2079
- {
2080
- "epoch": 4.13,
2081
- "learning_rate": 7.634165742006251e-05,
2082
- "loss": 3.1487,
2083
- "step": 166500
2084
- },
2085
- {
2086
- "epoch": 4.14,
2087
- "learning_rate": 7.631993083295134e-05,
2088
- "loss": 3.04,
2089
- "step": 167000
2090
- },
2091
- {
2092
- "epoch": 4.15,
2093
- "learning_rate": 7.62981430309553e-05,
2094
- "loss": 3.0355,
2095
- "step": 167500
2096
- },
2097
- {
2098
- "epoch": 4.16,
2099
- "learning_rate": 7.627629405079637e-05,
2100
- "loss": 3.1222,
2101
- "step": 168000
2102
- },
2103
- {
2104
- "epoch": 4.18,
2105
- "learning_rate": 7.625438392929956e-05,
2106
- "loss": 3.0185,
2107
- "step": 168500
2108
- },
2109
- {
2110
- "epoch": 4.19,
2111
- "learning_rate": 7.623241270339294e-05,
2112
- "loss": 3.036,
2113
- "step": 169000
2114
- },
2115
- {
2116
- "epoch": 4.2,
2117
- "learning_rate": 7.621038041010763e-05,
2118
- "loss": 3.0182,
2119
- "step": 169500
2120
- },
2121
- {
2122
- "epoch": 4.21,
2123
- "learning_rate": 7.61882870865776e-05,
2124
- "loss": 3.0501,
2125
- "step": 170000
2126
- },
2127
- {
2128
- "epoch": 4.23,
2129
- "learning_rate": 7.61661327700397e-05,
2130
- "loss": 3.0935,
2131
- "step": 170500
2132
- },
2133
- {
2134
- "epoch": 4.24,
2135
- "learning_rate": 7.614391749783361e-05,
2136
- "loss": 2.9745,
2137
- "step": 171000
2138
- },
2139
- {
2140
- "epoch": 4.25,
2141
- "learning_rate": 7.612164130740175e-05,
2142
- "loss": 3.0497,
2143
- "step": 171500
2144
- },
2145
- {
2146
- "epoch": 4.26,
2147
- "learning_rate": 7.609930423628915e-05,
2148
- "loss": 3.0207,
2149
- "step": 172000
2150
- },
2151
- {
2152
- "epoch": 4.28,
2153
- "learning_rate": 7.607690632214351e-05,
2154
- "loss": 3.0059,
2155
- "step": 172500
2156
- },
2157
- {
2158
- "epoch": 4.29,
2159
- "learning_rate": 7.605444760271507e-05,
2160
- "loss": 3.113,
2161
- "step": 173000
2162
- },
2163
- {
2164
- "epoch": 4.3,
2165
- "learning_rate": 7.603192811585654e-05,
2166
- "loss": 3.0989,
2167
- "step": 173500
2168
- },
2169
- {
2170
- "epoch": 4.31,
2171
- "learning_rate": 7.600934789952304e-05,
2172
- "loss": 3.1174,
2173
- "step": 174000
2174
- },
2175
- {
2176
- "epoch": 4.32,
2177
- "learning_rate": 7.598670699177207e-05,
2178
- "loss": 3.0884,
2179
- "step": 174500
2180
- },
2181
- {
2182
- "epoch": 4.34,
2183
- "learning_rate": 7.596400543076339e-05,
2184
- "loss": 2.973,
2185
- "step": 175000
2186
- },
2187
- {
2188
- "epoch": 4.35,
2189
- "learning_rate": 7.594124325475904e-05,
2190
- "loss": 3.0956,
2191
- "step": 175500
2192
- },
2193
- {
2194
- "epoch": 4.36,
2195
- "learning_rate": 7.591842050212317e-05,
2196
- "loss": 2.9274,
2197
- "step": 176000
2198
- },
2199
- {
2200
- "epoch": 4.37,
2201
- "learning_rate": 7.589553721132205e-05,
2202
- "loss": 3.0128,
2203
- "step": 176500
2204
- },
2205
- {
2206
- "epoch": 4.39,
2207
- "learning_rate": 7.587259342092397e-05,
2208
- "loss": 3.0429,
2209
- "step": 177000
2210
- },
2211
- {
2212
- "epoch": 4.4,
2213
- "learning_rate": 7.584958916959923e-05,
2214
- "loss": 3.0955,
2215
- "step": 177500
2216
- },
2217
- {
2218
- "epoch": 4.41,
2219
- "learning_rate": 7.582652449611996e-05,
2220
- "loss": 3.1124,
2221
- "step": 178000
2222
- },
2223
- {
2224
- "epoch": 4.42,
2225
- "learning_rate": 7.58033994393602e-05,
2226
- "loss": 2.9723,
2227
- "step": 178500
2228
- },
2229
- {
2230
- "epoch": 4.44,
2231
- "learning_rate": 7.578021403829572e-05,
2232
- "loss": 2.981,
2233
- "step": 179000
2234
- },
2235
- {
2236
- "epoch": 4.45,
2237
- "learning_rate": 7.5756968332004e-05,
2238
- "loss": 3.1174,
2239
- "step": 179500
2240
- },
2241
- {
2242
- "epoch": 4.46,
2243
- "learning_rate": 7.57336623596642e-05,
2244
- "loss": 3.0292,
2245
- "step": 180000
2246
- },
2247
- {
2248
- "epoch": 4.47,
2249
- "learning_rate": 7.5710296160557e-05,
2250
- "loss": 3.0106,
2251
- "step": 180500
2252
- },
2253
- {
2254
- "epoch": 4.49,
2255
- "learning_rate": 7.568686977406459e-05,
2256
- "loss": 2.9749,
2257
- "step": 181000
2258
- },
2259
- {
2260
- "epoch": 4.5,
2261
- "learning_rate": 7.566338323967065e-05,
2262
- "loss": 3.0591,
2263
- "step": 181500
2264
- },
2265
- {
2266
- "epoch": 4.51,
2267
- "learning_rate": 7.563983659696022e-05,
2268
- "loss": 3.0228,
2269
- "step": 182000
2270
- },
2271
- {
2272
- "epoch": 4.52,
2273
- "learning_rate": 7.56162298856196e-05,
2274
- "loss": 3.0134,
2275
- "step": 182500
2276
- },
2277
- {
2278
- "epoch": 4.54,
2279
- "learning_rate": 7.559256314543639e-05,
2280
- "loss": 3.0624,
2281
- "step": 183000
2282
- },
2283
- {
2284
- "epoch": 4.55,
2285
- "learning_rate": 7.556883641629936e-05,
2286
- "loss": 3.0205,
2287
- "step": 183500
2288
- },
2289
- {
2290
- "epoch": 4.56,
2291
- "learning_rate": 7.554504973819835e-05,
2292
- "loss": 3.0067,
2293
- "step": 184000
2294
- },
2295
- {
2296
- "epoch": 4.57,
2297
- "learning_rate": 7.552120315122426e-05,
2298
- "loss": 3.0488,
2299
- "step": 184500
2300
- },
2301
- {
2302
- "epoch": 4.59,
2303
- "learning_rate": 7.549729669556898e-05,
2304
- "loss": 3.0992,
2305
- "step": 185000
2306
- },
2307
- {
2308
- "epoch": 4.6,
2309
- "learning_rate": 7.547333041152526e-05,
2310
- "loss": 3.0137,
2311
- "step": 185500
2312
- },
2313
- {
2314
- "epoch": 4.61,
2315
- "learning_rate": 7.544930433948676e-05,
2316
- "loss": 3.1379,
2317
- "step": 186000
2318
- },
2319
- {
2320
- "epoch": 4.62,
2321
- "learning_rate": 7.542521851994781e-05,
2322
- "loss": 3.0818,
2323
- "step": 186500
2324
- },
2325
- {
2326
- "epoch": 4.63,
2327
- "learning_rate": 7.540107299350354e-05,
2328
- "loss": 3.0634,
2329
- "step": 187000
2330
- },
2331
- {
2332
- "epoch": 4.65,
2333
- "learning_rate": 7.537686780084966e-05,
2334
- "loss": 3.0984,
2335
- "step": 187500
2336
- },
2337
- {
2338
- "epoch": 4.66,
2339
- "learning_rate": 7.53526029827824e-05,
2340
- "loss": 3.1238,
2341
- "step": 188000
2342
- },
2343
- {
2344
- "epoch": 4.67,
2345
- "learning_rate": 7.532827858019862e-05,
2346
- "loss": 3.0431,
2347
- "step": 188500
2348
- },
2349
- {
2350
- "epoch": 4.68,
2351
- "learning_rate": 7.530389463409545e-05,
2352
- "loss": 3.0216,
2353
- "step": 189000
2354
- },
2355
- {
2356
- "epoch": 4.7,
2357
- "learning_rate": 7.527945118557048e-05,
2358
- "loss": 3.0448,
2359
- "step": 189500
2360
- },
2361
- {
2362
- "epoch": 4.71,
2363
- "learning_rate": 7.525494827582155e-05,
2364
- "loss": 3.1713,
2365
- "step": 190000
2366
- },
2367
- {
2368
- "epoch": 4.72,
2369
- "learning_rate": 7.523038594614671e-05,
2370
- "loss": 3.0396,
2371
- "step": 190500
2372
- },
2373
- {
2374
- "epoch": 4.73,
2375
- "learning_rate": 7.52057642379442e-05,
2376
- "loss": 3.1008,
2377
- "step": 191000
2378
- },
2379
- {
2380
- "epoch": 4.75,
2381
- "learning_rate": 7.518108319271228e-05,
2382
- "loss": 3.0965,
2383
- "step": 191500
2384
- },
2385
- {
2386
- "epoch": 4.76,
2387
- "learning_rate": 7.515634285204928e-05,
2388
- "loss": 3.0407,
2389
- "step": 192000
2390
- },
2391
- {
2392
- "epoch": 4.77,
2393
- "learning_rate": 7.51315432576534e-05,
2394
- "loss": 3.0669,
2395
- "step": 192500
2396
- },
2397
- {
2398
- "epoch": 4.78,
2399
- "learning_rate": 7.510668445132279e-05,
2400
- "loss": 3.0752,
2401
- "step": 193000
2402
- },
2403
- {
2404
- "epoch": 4.8,
2405
- "learning_rate": 7.508176647495532e-05,
2406
- "loss": 2.9414,
2407
- "step": 193500
2408
- },
2409
- {
2410
- "epoch": 4.81,
2411
- "learning_rate": 7.505678937054863e-05,
2412
- "loss": 3.0746,
2413
- "step": 194000
2414
- },
2415
- {
2416
- "epoch": 4.82,
2417
- "learning_rate": 7.503175318019999e-05,
2418
- "loss": 2.9645,
2419
- "step": 194500
2420
- },
2421
- {
2422
- "epoch": 4.83,
2423
- "learning_rate": 7.500665794610632e-05,
2424
- "loss": 3.0942,
2425
- "step": 195000
2426
- },
2427
- {
2428
- "epoch": 4.85,
2429
- "learning_rate": 7.498150371056396e-05,
2430
- "loss": 3.0586,
2431
- "step": 195500
2432
- },
2433
- {
2434
- "epoch": 4.86,
2435
- "learning_rate": 7.495629051596876e-05,
2436
- "loss": 3.0251,
2437
- "step": 196000
2438
- },
2439
- {
2440
- "epoch": 4.87,
2441
- "learning_rate": 7.493101840481594e-05,
2442
- "loss": 3.0769,
2443
- "step": 196500
2444
- },
2445
- {
2446
- "epoch": 4.88,
2447
- "learning_rate": 7.490568741969997e-05,
2448
- "loss": 3.0701,
2449
- "step": 197000
2450
- },
2451
- {
2452
- "epoch": 4.89,
2453
- "learning_rate": 7.488029760331459e-05,
2454
- "loss": 3.0893,
2455
- "step": 197500
2456
- },
2457
- {
2458
- "epoch": 4.91,
2459
- "learning_rate": 7.48548489984527e-05,
2460
- "loss": 3.1527,
2461
- "step": 198000
2462
- },
2463
- {
2464
- "epoch": 4.92,
2465
- "learning_rate": 7.482934164800626e-05,
2466
- "loss": 3.0497,
2467
- "step": 198500
2468
- },
2469
- {
2470
- "epoch": 4.93,
2471
- "learning_rate": 7.480377559496624e-05,
2472
- "loss": 3.0665,
2473
- "step": 199000
2474
- },
2475
- {
2476
- "epoch": 4.94,
2477
- "learning_rate": 7.47781508824226e-05,
2478
- "loss": 3.0842,
2479
- "step": 199500
2480
- },
2481
- {
2482
- "epoch": 4.96,
2483
- "learning_rate": 7.47524675535641e-05,
2484
- "loss": 3.0961,
2485
- "step": 200000
2486
- },
2487
- {
2488
- "epoch": 4.97,
2489
- "learning_rate": 7.472672565167833e-05,
2490
- "loss": 3.0756,
2491
- "step": 200500
2492
- },
2493
- {
2494
- "epoch": 4.98,
2495
- "learning_rate": 7.470092522015158e-05,
2496
- "loss": 3.1524,
2497
- "step": 201000
2498
- },
2499
- {
2500
- "epoch": 4.99,
2501
- "learning_rate": 7.467506630246882e-05,
2502
- "loss": 3.1614,
2503
- "step": 201500
2504
- },
2505
- {
2506
- "epoch": 5.0,
2507
  "eval_bleu": 1.0,
2508
  "eval_brevity_penalty": 1.0,
2509
  "eval_length_ratio": 1.0,
2510
- "eval_loss": 2.995251417160034,
2511
  "eval_precisions": [
2512
  1.0,
2513
  1.0,
2514
  1.0,
2515
  1.0
2516
  ],
2517
- "eval_reference_length": 4591104,
2518
- "eval_runtime": 8351.4463,
2519
- "eval_samples_per_second": 1.074,
2520
- "eval_steps_per_second": 0.537,
2521
- "eval_translation_length": 4591104,
2522
- "step": 201740
2523
  }
2524
  ],
2525
  "logging_steps": 500,
2526
- "max_steps": 1210440,
2527
  "num_train_epochs": 30,
2528
  "save_steps": 1000,
2529
- "total_flos": 3.462342804976435e+18,
2530
  "trial_name": null,
2531
  "trial_params": null
2532
  }
 
1
  {
2
+ "best_metric": 2.873922109603882,
3
+ "best_model_checkpoint": "dq158/coqui/checkpoint-40162",
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 40162,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11
  {
12
  "epoch": 0.01,
13
  "learning_rate": 8e-05,
14
+ "loss": 2.9713,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.02,
19
+ "learning_rate": 7.99999659782192e-05,
20
+ "loss": 2.9968,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 0.04,
25
+ "learning_rate": 7.999986391293467e-05,
26
+ "loss": 3.1051,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 0.05,
31
+ "learning_rate": 7.999969380432003e-05,
32
+ "loss": 3.0623,
33
  "step": 2000
34
  },
35
  {
36
  "epoch": 0.06,
37
+ "learning_rate": 7.999945565266465e-05,
38
+ "loss": 3.0735,
39
  "step": 2500
40
  },
41
  {
42
  "epoch": 0.07,
43
+ "learning_rate": 7.999914945837365e-05,
44
+ "loss": 3.0149,
45
  "step": 3000
46
  },
47
  {
48
  "epoch": 0.09,
49
+ "learning_rate": 7.999877522196789e-05,
50
+ "loss": 2.9847,
51
  "step": 3500
52
  },
53
  {
54
  "epoch": 0.1,
55
+ "learning_rate": 7.999833294408398e-05,
56
+ "loss": 3.1534,
57
  "step": 4000
58
  },
59
  {
60
  "epoch": 0.11,
61
+ "learning_rate": 7.999782262547429e-05,
62
+ "loss": 2.9796,
63
  "step": 4500
64
  },
65
  {
66
  "epoch": 0.12,
67
+ "learning_rate": 7.999724426700689e-05,
68
+ "loss": 2.984,
69
  "step": 5000
70
  },
71
  {
72
  "epoch": 0.14,
73
+ "learning_rate": 7.999659786966562e-05,
74
+ "loss": 2.9924,
75
  "step": 5500
76
  },
77
  {
78
  "epoch": 0.15,
79
+ "learning_rate": 7.99958834345501e-05,
80
+ "loss": 2.9193,
81
  "step": 6000
82
  },
83
  {
84
  "epoch": 0.16,
85
+ "learning_rate": 7.99951009628756e-05,
86
+ "loss": 3.0217,
87
  "step": 6500
88
  },
89
  {
90
  "epoch": 0.17,
91
+ "learning_rate": 7.999425045597321e-05,
92
+ "loss": 3.1339,
93
  "step": 7000
94
  },
95
  {
96
  "epoch": 0.19,
97
+ "learning_rate": 7.999333191528969e-05,
98
+ "loss": 3.0685,
99
  "step": 7500
100
  },
101
  {
102
  "epoch": 0.2,
103
+ "learning_rate": 7.999234534238758e-05,
104
+ "loss": 3.0571,
105
  "step": 8000
106
  },
107
  {
108
  "epoch": 0.21,
109
+ "learning_rate": 7.999129073894513e-05,
110
+ "loss": 3.1052,
111
  "step": 8500
112
  },
113
  {
114
  "epoch": 0.22,
115
+ "learning_rate": 7.999016810675628e-05,
116
+ "loss": 3.052,
117
  "step": 9000
118
  },
119
  {
120
  "epoch": 0.24,
121
+ "learning_rate": 7.998897744773075e-05,
122
+ "loss": 3.0529,
123
  "step": 9500
124
  },
125
  {
126
  "epoch": 0.25,
127
+ "learning_rate": 7.998771876389398e-05,
128
+ "loss": 3.1207,
129
  "step": 10000
130
  },
131
  {
132
  "epoch": 0.26,
133
+ "learning_rate": 7.998639205738706e-05,
134
+ "loss": 3.0854,
135
  "step": 10500
136
  },
137
  {
138
  "epoch": 0.27,
139
+ "learning_rate": 7.998499733046688e-05,
140
+ "loss": 3.0566,
141
  "step": 11000
142
  },
143
  {
144
  "epoch": 0.29,
145
+ "learning_rate": 7.998353458550596e-05,
146
+ "loss": 3.0659,
147
  "step": 11500
148
  },
149
  {
150
  "epoch": 0.3,
151
+ "learning_rate": 7.998200382499256e-05,
152
+ "loss": 2.9975,
153
  "step": 12000
154
  },
155
  {
156
  "epoch": 0.31,
157
+ "learning_rate": 7.998040505153066e-05,
158
+ "loss": 3.0442,
159
  "step": 12500
160
  },
161
  {
162
  "epoch": 0.32,
163
+ "learning_rate": 7.997873826783991e-05,
164
+ "loss": 3.0385,
165
  "step": 13000
166
  },
167
  {
168
+ "epoch": 0.34,
169
+ "learning_rate": 7.997700347675568e-05,
170
+ "loss": 3.0444,
171
  "step": 13500
172
  },
173
  {
174
  "epoch": 0.35,
175
+ "learning_rate": 7.997520068122894e-05,
176
+ "loss": 3.042,
177
  "step": 14000
178
  },
179
  {
180
  "epoch": 0.36,
181
+ "learning_rate": 7.997332988432647e-05,
182
+ "loss": 3.0603,
183
  "step": 14500
184
  },
185
  {
186
  "epoch": 0.37,
187
+ "learning_rate": 7.997139108923062e-05,
188
+ "loss": 3.0842,
189
  "step": 15000
190
  },
191
  {
192
+ "epoch": 0.39,
193
+ "learning_rate": 7.996938429923948e-05,
194
+ "loss": 3.0216,
195
  "step": 15500
196
  },
197
  {
198
  "epoch": 0.4,
199
+ "learning_rate": 7.996730951776675e-05,
200
+ "loss": 2.977,
201
  "step": 16000
202
  },
203
  {
204
  "epoch": 0.41,
205
+ "learning_rate": 7.996516674834186e-05,
206
+ "loss": 3.1044,
207
  "step": 16500
208
  },
209
  {
210
  "epoch": 0.42,
211
+ "learning_rate": 7.996295599460983e-05,
212
+ "loss": 3.0355,
213
  "step": 17000
214
  },
215
  {
216
+ "epoch": 0.44,
217
+ "learning_rate": 7.996067726033133e-05,
218
+ "loss": 2.9189,
219
  "step": 17500
220
  },
221
  {
222
  "epoch": 0.45,
223
+ "learning_rate": 7.99583305493827e-05,
224
+ "loss": 3.0116,
225
  "step": 18000
226
  },
227
  {
228
  "epoch": 0.46,
229
+ "learning_rate": 7.995591586575593e-05,
230
+ "loss": 2.924,
231
  "step": 18500
232
  },
233
  {
234
  "epoch": 0.47,
235
+ "learning_rate": 7.99534332135586e-05,
236
+ "loss": 2.9726,
237
  "step": 19000
238
  },
239
  {
240
+ "epoch": 0.49,
241
+ "learning_rate": 7.99508825970139e-05,
242
+ "loss": 3.0505,
243
  "step": 19500
244
  },
245
  {
246
  "epoch": 0.5,
247
+ "learning_rate": 7.994826402046067e-05,
248
+ "loss": 3.1109,
249
  "step": 20000
250
  },
251
  {
252
  "epoch": 0.51,
253
+ "learning_rate": 7.994557748835336e-05,
254
+ "loss": 3.0032,
255
  "step": 20500
256
  },
257
  {
258
  "epoch": 0.52,
259
+ "learning_rate": 7.994282300526196e-05,
260
+ "loss": 2.952,
261
  "step": 21000
262
  },
263
  {
264
+ "epoch": 0.54,
265
+ "learning_rate": 7.994000057587214e-05,
266
+ "loss": 3.0557,
267
  "step": 21500
268
  },
269
  {
270
  "epoch": 0.55,
271
+ "learning_rate": 7.993711020498506e-05,
272
+ "loss": 3.0012,
273
  "step": 22000
274
  },
275
  {
276
  "epoch": 0.56,
277
+ "learning_rate": 7.993415189751751e-05,
278
+ "loss": 3.0834,
279
  "step": 22500
280
  },
281
  {
282
  "epoch": 0.57,
283
+ "learning_rate": 7.993112565850186e-05,
284
+ "loss": 3.1128,
285
  "step": 23000
286
  },
287
  {
288
+ "epoch": 0.59,
289
+ "learning_rate": 7.992803149308598e-05,
290
+ "loss": 2.9702,
291
  "step": 23500
292
  },
293
  {
294
+ "epoch": 0.6,
295
+ "learning_rate": 7.992486940653335e-05,
296
+ "loss": 2.9286,
297
  "step": 24000
298
  },
299
  {
300
  "epoch": 0.61,
301
+ "learning_rate": 7.992163940422294e-05,
302
+ "loss": 3.0021,
303
  "step": 24500
304
  },
305
  {
306
  "epoch": 0.62,
307
+ "learning_rate": 7.991834149164927e-05,
308
+ "loss": 3.0641,
309
  "step": 25000
310
  },
311
  {
312
  "epoch": 0.63,
313
+ "learning_rate": 7.991497567442239e-05,
314
+ "loss": 3.0452,
315
  "step": 25500
316
  },
317
  {
318
+ "epoch": 0.65,
319
+ "learning_rate": 7.991154195826784e-05,
320
+ "loss": 3.1197,
321
  "step": 26000
322
  },
323
  {
324
  "epoch": 0.66,
325
+ "learning_rate": 7.99080403490267e-05,
326
+ "loss": 2.9332,
327
  "step": 26500
328
  },
329
  {
330
  "epoch": 0.67,
331
+ "learning_rate": 7.990447085265552e-05,
332
+ "loss": 2.9659,
333
  "step": 27000
334
  },
335
  {
336
  "epoch": 0.68,
337
+ "learning_rate": 7.990083347522633e-05,
338
+ "loss": 3.0604,
339
  "step": 27500
340
  },
341
  {
342
+ "epoch": 0.7,
343
+ "learning_rate": 7.98971282229266e-05,
344
+ "loss": 3.0265,
345
  "step": 28000
346
  },
347
  {
348
  "epoch": 0.71,
349
+ "learning_rate": 7.989335510205932e-05,
350
+ "loss": 3.0863,
351
  "step": 28500
352
  },
353
  {
354
  "epoch": 0.72,
355
+ "learning_rate": 7.98895141190429e-05,
356
+ "loss": 3.0246,
357
  "step": 29000
358
  },
359
  {
360
  "epoch": 0.73,
361
+ "learning_rate": 7.988560528041123e-05,
362
+ "loss": 3.068,
363
  "step": 29500
364
  },
365
  {
366
+ "epoch": 0.75,
367
+ "learning_rate": 7.988162859281352e-05,
368
+ "loss": 3.0201,
369
  "step": 30000
370
  },
371
  {
372
  "epoch": 0.76,
373
+ "learning_rate": 7.987758406301453e-05,
374
+ "loss": 2.9901,
375
  "step": 30500
376
  },
377
  {
378
  "epoch": 0.77,
379
+ "learning_rate": 7.987347169789434e-05,
380
+ "loss": 3.0216,
381
  "step": 31000
382
  },
383
  {
384
  "epoch": 0.78,
385
+ "learning_rate": 7.986929150444845e-05,
386
+ "loss": 2.9896,
387
  "step": 31500
388
  },
389
  {
390
+ "epoch": 0.8,
391
+ "learning_rate": 7.986504348978775e-05,
392
+ "loss": 2.9584,
393
  "step": 32000
394
  },
395
  {
396
  "epoch": 0.81,
397
+ "learning_rate": 7.986072766113848e-05,
398
+ "loss": 3.1194,
399
  "step": 32500
400
  },
401
  {
402
  "epoch": 0.82,
403
+ "learning_rate": 7.985634402584225e-05,
404
+ "loss": 3.0539,
405
  "step": 33000
406
  },
407
  {
408
  "epoch": 0.83,
409
+ "learning_rate": 7.985189259135603e-05,
410
+ "loss": 3.025,
411
  "step": 33500
412
  },
413
  {
414
+ "epoch": 0.85,
415
+ "learning_rate": 7.98473733652521e-05,
416
+ "loss": 3.0168,
417
  "step": 34000
418
  },
419
  {
420
  "epoch": 0.86,
421
+ "learning_rate": 7.984278635521804e-05,
422
+ "loss": 3.0642,
423
  "step": 34500
424
  },
425
  {
426
  "epoch": 0.87,
427
+ "learning_rate": 7.98381315690568e-05,
428
+ "loss": 3.1025,
429
  "step": 35000
430
  },
431
  {
432
  "epoch": 0.88,
433
+ "learning_rate": 7.983340901468657e-05,
434
+ "loss": 2.9948,
435
  "step": 35500
436
  },
437
  {
438
+ "epoch": 0.9,
439
+ "learning_rate": 7.982861870014082e-05,
440
+ "loss": 3.1125,
441
  "step": 36000
442
  },
443
  {
444
+ "epoch": 0.91,
445
+ "learning_rate": 7.982376063356834e-05,
446
+ "loss": 2.9866,
447
  "step": 36500
448
  },
449
  {
450
  "epoch": 0.92,
451
+ "learning_rate": 7.981883482323309e-05,
452
+ "loss": 3.0354,
453
  "step": 37000
454
  },
455
  {
456
  "epoch": 0.93,
457
+ "learning_rate": 7.981384127751434e-05,
458
+ "loss": 3.0398,
459
  "step": 37500
460
  },
461
  {
462
+ "epoch": 0.95,
463
+ "learning_rate": 7.980878000490655e-05,
464
+ "loss": 2.9966,
465
  "step": 38000
466
  },
467
  {
468
+ "epoch": 0.96,
469
+ "learning_rate": 7.98036510140194e-05,
470
+ "loss": 3.003,
471
  "step": 38500
472
  },
473
  {
474
  "epoch": 0.97,
475
+ "learning_rate": 7.979845431357774e-05,
476
+ "loss": 3.0215,
477
  "step": 39000
478
  },
479
  {
480
  "epoch": 0.98,
481
+ "learning_rate": 7.979318991242163e-05,
482
+ "loss": 3.0306,
483
  "step": 39500
484
  },
 
 
 
 
 
 
485
  {
486
  "epoch": 1.0,
487
+ "learning_rate": 7.978785781950629e-05,
488
+ "loss": 3.1022,
489
+ "step": 40000
 
 
 
 
 
 
 
 
 
 
 
 
 
490
  },
491
  {
492
  "epoch": 1.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  "eval_bleu": 1.0,
494
  "eval_brevity_penalty": 1.0,
495
  "eval_length_ratio": 1.0,
496
+ "eval_loss": 2.873922109603882,
497
  "eval_precisions": [
498
  1.0,
499
  1.0,
500
  1.0,
501
  1.0
502
  ],
503
+ "eval_reference_length": 4569600,
504
+ "eval_runtime": 7627.4866,
505
+ "eval_samples_per_second": 1.17,
506
+ "eval_steps_per_second": 0.585,
507
+ "eval_translation_length": 4569600,
508
+ "step": 40162
509
  }
510
  ],
511
  "logging_steps": 500,
512
+ "max_steps": 1204860,
513
  "num_train_epochs": 30,
514
  "save_steps": 1000,
515
+ "total_flos": 6.892848961321697e+17,
516
  "trial_name": null,
517
  "trial_params": null
518
  }