abdiharyadi commited on
Commit
5ba1380
1 Parent(s): ae35b0c

Training in progress, step 14400, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e207271ed9fc29b099863defab32d4952b4913bf12a40b3e58c16c30ace3354
3
  size 1575259780
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a9d10b584cec8273eca5d86cc6016f2c25559a476d52f85256fa198af4ac99a
3
  size 1575259780
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fef1272e9932d85f4a0434d895c0520855facd99c96206dd3c6f6de6dc78ba1a
3
  size 3150397656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb7691fcf78fced887666a81c8e060bbb605df22f8bcaf5b9764dddfc2c9cb5d
3
  size 3150397656
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:725551c540c5935ca09ad257e1ec602a9cde53567df7545ad488987db5f81d2a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:828235e6c6c3e1c9eda9bb230976f45c0d41cf99d80a5249dafb40a90914235b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7a14dd5679ca365b7036c52129c5a5a2c8f2459be0b4d29ecb21c4b219dee84
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7489f778f4930a259c1c1d497136232a14b3ffe1be08fc49fc9ca0bfcb310542
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 49.121,
3
- "best_model_checkpoint": "/kaggle/working/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted-amr-generation-v2-fted/checkpoint-10800",
4
- "epoch": 119.20529801324503,
5
  "eval_steps": 3600,
6
- "global_step": 10800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3283,6 +3283,1096 @@
3283
  "eval_samples_per_second": 9.255,
3284
  "eval_steps_per_second": 1.851,
3285
  "step": 10800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3286
  }
3287
  ],
3288
  "logging_steps": 20,
@@ -3302,7 +4392,7 @@
3302
  "attributes": {}
3303
  }
3304
  },
3305
- "total_flos": 3.194864182891315e+16,
3306
  "train_batch_size": 5,
3307
  "trial_name": null,
3308
  "trial_params": null
 
1
  {
2
+ "best_metric": 52.1498,
3
+ "best_model_checkpoint": "/kaggle/working/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted-amr-generation-v2-fted/checkpoint-14400",
4
+ "epoch": 158.94039735099338,
5
  "eval_steps": 3600,
6
+ "global_step": 14400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3283
  "eval_samples_per_second": 9.255,
3284
  "eval_steps_per_second": 1.851,
3285
  "step": 10800
3286
+ },
3287
+ {
3288
+ "epoch": 119.42604856512142,
3289
+ "learning_rate": 8.334843205574913e-07,
3290
+ "loss": 1.3468,
3291
+ "step": 10820
3292
+ },
3293
+ {
3294
+ "epoch": 119.6467991169978,
3295
+ "learning_rate": 8.331707317073171e-07,
3296
+ "loss": 1.3168,
3297
+ "step": 10840
3298
+ },
3299
+ {
3300
+ "epoch": 119.86754966887418,
3301
+ "learning_rate": 8.328571428571428e-07,
3302
+ "loss": 1.3797,
3303
+ "step": 10860
3304
+ },
3305
+ {
3306
+ "epoch": 120.08830022075055,
3307
+ "learning_rate": 8.325435540069686e-07,
3308
+ "loss": 1.3921,
3309
+ "step": 10880
3310
+ },
3311
+ {
3312
+ "epoch": 120.30905077262693,
3313
+ "learning_rate": 8.322299651567943e-07,
3314
+ "loss": 1.3714,
3315
+ "step": 10900
3316
+ },
3317
+ {
3318
+ "epoch": 120.52980132450331,
3319
+ "learning_rate": 8.319163763066202e-07,
3320
+ "loss": 1.3932,
3321
+ "step": 10920
3322
+ },
3323
+ {
3324
+ "epoch": 120.75055187637969,
3325
+ "learning_rate": 8.316027874564459e-07,
3326
+ "loss": 1.361,
3327
+ "step": 10940
3328
+ },
3329
+ {
3330
+ "epoch": 120.97130242825607,
3331
+ "learning_rate": 8.312891986062718e-07,
3332
+ "loss": 1.3852,
3333
+ "step": 10960
3334
+ },
3335
+ {
3336
+ "epoch": 121.19205298013244,
3337
+ "learning_rate": 8.309756097560976e-07,
3338
+ "loss": 1.3572,
3339
+ "step": 10980
3340
+ },
3341
+ {
3342
+ "epoch": 121.41280353200882,
3343
+ "learning_rate": 8.306620209059233e-07,
3344
+ "loss": 1.368,
3345
+ "step": 11000
3346
+ },
3347
+ {
3348
+ "epoch": 121.63355408388522,
3349
+ "learning_rate": 8.303484320557491e-07,
3350
+ "loss": 1.3301,
3351
+ "step": 11020
3352
+ },
3353
+ {
3354
+ "epoch": 121.8543046357616,
3355
+ "learning_rate": 8.300348432055749e-07,
3356
+ "loss": 1.3618,
3357
+ "step": 11040
3358
+ },
3359
+ {
3360
+ "epoch": 122.07505518763797,
3361
+ "learning_rate": 8.297212543554007e-07,
3362
+ "loss": 1.3663,
3363
+ "step": 11060
3364
+ },
3365
+ {
3366
+ "epoch": 122.29580573951435,
3367
+ "learning_rate": 8.294076655052264e-07,
3368
+ "loss": 1.3595,
3369
+ "step": 11080
3370
+ },
3371
+ {
3372
+ "epoch": 122.51655629139073,
3373
+ "learning_rate": 8.290940766550522e-07,
3374
+ "loss": 1.3469,
3375
+ "step": 11100
3376
+ },
3377
+ {
3378
+ "epoch": 122.7373068432671,
3379
+ "learning_rate": 8.287804878048781e-07,
3380
+ "loss": 1.3225,
3381
+ "step": 11120
3382
+ },
3383
+ {
3384
+ "epoch": 122.95805739514348,
3385
+ "learning_rate": 8.284668989547038e-07,
3386
+ "loss": 1.3757,
3387
+ "step": 11140
3388
+ },
3389
+ {
3390
+ "epoch": 123.17880794701986,
3391
+ "learning_rate": 8.281533101045296e-07,
3392
+ "loss": 1.3782,
3393
+ "step": 11160
3394
+ },
3395
+ {
3396
+ "epoch": 123.39955849889624,
3397
+ "learning_rate": 8.278397212543553e-07,
3398
+ "loss": 1.3417,
3399
+ "step": 11180
3400
+ },
3401
+ {
3402
+ "epoch": 123.62030905077263,
3403
+ "learning_rate": 8.275261324041812e-07,
3404
+ "loss": 1.353,
3405
+ "step": 11200
3406
+ },
3407
+ {
3408
+ "epoch": 123.84105960264901,
3409
+ "learning_rate": 8.272125435540068e-07,
3410
+ "loss": 1.3455,
3411
+ "step": 11220
3412
+ },
3413
+ {
3414
+ "epoch": 124.06181015452539,
3415
+ "learning_rate": 8.268989547038327e-07,
3416
+ "loss": 1.3426,
3417
+ "step": 11240
3418
+ },
3419
+ {
3420
+ "epoch": 124.28256070640177,
3421
+ "learning_rate": 8.265853658536586e-07,
3422
+ "loss": 1.3728,
3423
+ "step": 11260
3424
+ },
3425
+ {
3426
+ "epoch": 124.50331125827815,
3427
+ "learning_rate": 8.262717770034843e-07,
3428
+ "loss": 1.3162,
3429
+ "step": 11280
3430
+ },
3431
+ {
3432
+ "epoch": 124.72406181015452,
3433
+ "learning_rate": 8.259581881533101e-07,
3434
+ "loss": 1.3784,
3435
+ "step": 11300
3436
+ },
3437
+ {
3438
+ "epoch": 124.9448123620309,
3439
+ "learning_rate": 8.256445993031358e-07,
3440
+ "loss": 1.3341,
3441
+ "step": 11320
3442
+ },
3443
+ {
3444
+ "epoch": 125.16556291390728,
3445
+ "learning_rate": 8.253310104529617e-07,
3446
+ "loss": 1.3837,
3447
+ "step": 11340
3448
+ },
3449
+ {
3450
+ "epoch": 125.38631346578366,
3451
+ "learning_rate": 8.250174216027874e-07,
3452
+ "loss": 1.3848,
3453
+ "step": 11360
3454
+ },
3455
+ {
3456
+ "epoch": 125.60706401766005,
3457
+ "learning_rate": 8.247038327526132e-07,
3458
+ "loss": 1.3667,
3459
+ "step": 11380
3460
+ },
3461
+ {
3462
+ "epoch": 125.82781456953643,
3463
+ "learning_rate": 8.24390243902439e-07,
3464
+ "loss": 1.3061,
3465
+ "step": 11400
3466
+ },
3467
+ {
3468
+ "epoch": 126.0485651214128,
3469
+ "learning_rate": 8.240766550522648e-07,
3470
+ "loss": 1.3405,
3471
+ "step": 11420
3472
+ },
3473
+ {
3474
+ "epoch": 126.26931567328919,
3475
+ "learning_rate": 8.237630662020905e-07,
3476
+ "loss": 1.3561,
3477
+ "step": 11440
3478
+ },
3479
+ {
3480
+ "epoch": 126.49006622516556,
3481
+ "learning_rate": 8.234494773519163e-07,
3482
+ "loss": 1.3472,
3483
+ "step": 11460
3484
+ },
3485
+ {
3486
+ "epoch": 126.71081677704194,
3487
+ "learning_rate": 8.23135888501742e-07,
3488
+ "loss": 1.3836,
3489
+ "step": 11480
3490
+ },
3491
+ {
3492
+ "epoch": 126.93156732891832,
3493
+ "learning_rate": 8.228222996515679e-07,
3494
+ "loss": 1.4228,
3495
+ "step": 11500
3496
+ },
3497
+ {
3498
+ "epoch": 127.1523178807947,
3499
+ "learning_rate": 8.225087108013937e-07,
3500
+ "loss": 1.3512,
3501
+ "step": 11520
3502
+ },
3503
+ {
3504
+ "epoch": 127.37306843267108,
3505
+ "learning_rate": 8.221951219512195e-07,
3506
+ "loss": 1.3561,
3507
+ "step": 11540
3508
+ },
3509
+ {
3510
+ "epoch": 127.59381898454747,
3511
+ "learning_rate": 8.218815331010453e-07,
3512
+ "loss": 1.3186,
3513
+ "step": 11560
3514
+ },
3515
+ {
3516
+ "epoch": 127.81456953642385,
3517
+ "learning_rate": 8.215679442508711e-07,
3518
+ "loss": 1.3349,
3519
+ "step": 11580
3520
+ },
3521
+ {
3522
+ "epoch": 128.0353200883002,
3523
+ "learning_rate": 8.212543554006968e-07,
3524
+ "loss": 1.3944,
3525
+ "step": 11600
3526
+ },
3527
+ {
3528
+ "epoch": 128.2560706401766,
3529
+ "learning_rate": 8.209407665505226e-07,
3530
+ "loss": 1.3425,
3531
+ "step": 11620
3532
+ },
3533
+ {
3534
+ "epoch": 128.47682119205297,
3535
+ "learning_rate": 8.206271777003484e-07,
3536
+ "loss": 1.3553,
3537
+ "step": 11640
3538
+ },
3539
+ {
3540
+ "epoch": 128.69757174392936,
3541
+ "learning_rate": 8.203135888501741e-07,
3542
+ "loss": 1.339,
3543
+ "step": 11660
3544
+ },
3545
+ {
3546
+ "epoch": 128.91832229580575,
3547
+ "learning_rate": 8.2e-07,
3548
+ "loss": 1.303,
3549
+ "step": 11680
3550
+ },
3551
+ {
3552
+ "epoch": 129.13907284768212,
3553
+ "learning_rate": 8.196864111498257e-07,
3554
+ "loss": 1.3808,
3555
+ "step": 11700
3556
+ },
3557
+ {
3558
+ "epoch": 129.3598233995585,
3559
+ "learning_rate": 8.193728222996516e-07,
3560
+ "loss": 1.3813,
3561
+ "step": 11720
3562
+ },
3563
+ {
3564
+ "epoch": 129.58057395143487,
3565
+ "learning_rate": 8.190592334494772e-07,
3566
+ "loss": 1.321,
3567
+ "step": 11740
3568
+ },
3569
+ {
3570
+ "epoch": 129.80132450331126,
3571
+ "learning_rate": 8.187456445993031e-07,
3572
+ "loss": 1.3458,
3573
+ "step": 11760
3574
+ },
3575
+ {
3576
+ "epoch": 130.02207505518763,
3577
+ "learning_rate": 8.184320557491288e-07,
3578
+ "loss": 1.3039,
3579
+ "step": 11780
3580
+ },
3581
+ {
3582
+ "epoch": 130.24282560706402,
3583
+ "learning_rate": 8.181184668989547e-07,
3584
+ "loss": 1.3619,
3585
+ "step": 11800
3586
+ },
3587
+ {
3588
+ "epoch": 130.46357615894038,
3589
+ "learning_rate": 8.178048780487805e-07,
3590
+ "loss": 1.3726,
3591
+ "step": 11820
3592
+ },
3593
+ {
3594
+ "epoch": 130.68432671081678,
3595
+ "learning_rate": 8.174912891986062e-07,
3596
+ "loss": 1.3511,
3597
+ "step": 11840
3598
+ },
3599
+ {
3600
+ "epoch": 130.90507726269317,
3601
+ "learning_rate": 8.171777003484321e-07,
3602
+ "loss": 1.3184,
3603
+ "step": 11860
3604
+ },
3605
+ {
3606
+ "epoch": 131.12582781456953,
3607
+ "learning_rate": 8.168641114982578e-07,
3608
+ "loss": 1.3295,
3609
+ "step": 11880
3610
+ },
3611
+ {
3612
+ "epoch": 131.34657836644593,
3613
+ "learning_rate": 8.165505226480836e-07,
3614
+ "loss": 1.3437,
3615
+ "step": 11900
3616
+ },
3617
+ {
3618
+ "epoch": 131.5673289183223,
3619
+ "learning_rate": 8.162369337979093e-07,
3620
+ "loss": 1.3191,
3621
+ "step": 11920
3622
+ },
3623
+ {
3624
+ "epoch": 131.78807947019868,
3625
+ "learning_rate": 8.159233449477352e-07,
3626
+ "loss": 1.3698,
3627
+ "step": 11940
3628
+ },
3629
+ {
3630
+ "epoch": 132.00883002207505,
3631
+ "learning_rate": 8.156097560975609e-07,
3632
+ "loss": 1.3018,
3633
+ "step": 11960
3634
+ },
3635
+ {
3636
+ "epoch": 132.22958057395144,
3637
+ "learning_rate": 8.152961672473867e-07,
3638
+ "loss": 1.3678,
3639
+ "step": 11980
3640
+ },
3641
+ {
3642
+ "epoch": 132.4503311258278,
3643
+ "learning_rate": 8.149825783972125e-07,
3644
+ "loss": 1.3626,
3645
+ "step": 12000
3646
+ },
3647
+ {
3648
+ "epoch": 132.6710816777042,
3649
+ "learning_rate": 8.146689895470383e-07,
3650
+ "loss": 1.3456,
3651
+ "step": 12020
3652
+ },
3653
+ {
3654
+ "epoch": 132.8918322295806,
3655
+ "learning_rate": 8.14355400696864e-07,
3656
+ "loss": 1.3649,
3657
+ "step": 12040
3658
+ },
3659
+ {
3660
+ "epoch": 133.11258278145695,
3661
+ "learning_rate": 8.140418118466898e-07,
3662
+ "loss": 1.3615,
3663
+ "step": 12060
3664
+ },
3665
+ {
3666
+ "epoch": 133.33333333333334,
3667
+ "learning_rate": 8.137282229965157e-07,
3668
+ "loss": 1.3447,
3669
+ "step": 12080
3670
+ },
3671
+ {
3672
+ "epoch": 133.5540838852097,
3673
+ "learning_rate": 8.134146341463414e-07,
3674
+ "loss": 1.3378,
3675
+ "step": 12100
3676
+ },
3677
+ {
3678
+ "epoch": 133.7748344370861,
3679
+ "learning_rate": 8.131010452961672e-07,
3680
+ "loss": 1.2747,
3681
+ "step": 12120
3682
+ },
3683
+ {
3684
+ "epoch": 133.99558498896246,
3685
+ "learning_rate": 8.12787456445993e-07,
3686
+ "loss": 1.3212,
3687
+ "step": 12140
3688
+ },
3689
+ {
3690
+ "epoch": 134.21633554083886,
3691
+ "learning_rate": 8.124738675958189e-07,
3692
+ "loss": 1.274,
3693
+ "step": 12160
3694
+ },
3695
+ {
3696
+ "epoch": 134.43708609271522,
3697
+ "learning_rate": 8.121602787456445e-07,
3698
+ "loss": 1.3424,
3699
+ "step": 12180
3700
+ },
3701
+ {
3702
+ "epoch": 134.6578366445916,
3703
+ "learning_rate": 8.118466898954703e-07,
3704
+ "loss": 1.3688,
3705
+ "step": 12200
3706
+ },
3707
+ {
3708
+ "epoch": 134.878587196468,
3709
+ "learning_rate": 8.115331010452961e-07,
3710
+ "loss": 1.2944,
3711
+ "step": 12220
3712
+ },
3713
+ {
3714
+ "epoch": 135.09933774834437,
3715
+ "learning_rate": 8.11219512195122e-07,
3716
+ "loss": 1.4009,
3717
+ "step": 12240
3718
+ },
3719
+ {
3720
+ "epoch": 135.32008830022076,
3721
+ "learning_rate": 8.109059233449476e-07,
3722
+ "loss": 1.3217,
3723
+ "step": 12260
3724
+ },
3725
+ {
3726
+ "epoch": 135.54083885209712,
3727
+ "learning_rate": 8.105923344947735e-07,
3728
+ "loss": 1.3039,
3729
+ "step": 12280
3730
+ },
3731
+ {
3732
+ "epoch": 135.76158940397352,
3733
+ "learning_rate": 8.102787456445994e-07,
3734
+ "loss": 1.3223,
3735
+ "step": 12300
3736
+ },
3737
+ {
3738
+ "epoch": 135.98233995584988,
3739
+ "learning_rate": 8.099651567944251e-07,
3740
+ "loss": 1.3649,
3741
+ "step": 12320
3742
+ },
3743
+ {
3744
+ "epoch": 136.20309050772627,
3745
+ "learning_rate": 8.096515679442508e-07,
3746
+ "loss": 1.3254,
3747
+ "step": 12340
3748
+ },
3749
+ {
3750
+ "epoch": 136.42384105960264,
3751
+ "learning_rate": 8.093379790940766e-07,
3752
+ "loss": 1.3474,
3753
+ "step": 12360
3754
+ },
3755
+ {
3756
+ "epoch": 136.64459161147903,
3757
+ "learning_rate": 8.090243902439025e-07,
3758
+ "loss": 1.3528,
3759
+ "step": 12380
3760
+ },
3761
+ {
3762
+ "epoch": 136.86534216335542,
3763
+ "learning_rate": 8.087108013937281e-07,
3764
+ "loss": 1.3654,
3765
+ "step": 12400
3766
+ },
3767
+ {
3768
+ "epoch": 137.08609271523179,
3769
+ "learning_rate": 8.08397212543554e-07,
3770
+ "loss": 1.3761,
3771
+ "step": 12420
3772
+ },
3773
+ {
3774
+ "epoch": 137.30684326710818,
3775
+ "learning_rate": 8.080836236933798e-07,
3776
+ "loss": 1.3623,
3777
+ "step": 12440
3778
+ },
3779
+ {
3780
+ "epoch": 137.52759381898454,
3781
+ "learning_rate": 8.077700348432056e-07,
3782
+ "loss": 1.322,
3783
+ "step": 12460
3784
+ },
3785
+ {
3786
+ "epoch": 137.74834437086093,
3787
+ "learning_rate": 8.074564459930312e-07,
3788
+ "loss": 1.3221,
3789
+ "step": 12480
3790
+ },
3791
+ {
3792
+ "epoch": 137.9690949227373,
3793
+ "learning_rate": 8.071428571428571e-07,
3794
+ "loss": 1.3333,
3795
+ "step": 12500
3796
+ },
3797
+ {
3798
+ "epoch": 138.1898454746137,
3799
+ "learning_rate": 8.068292682926829e-07,
3800
+ "loss": 1.3036,
3801
+ "step": 12520
3802
+ },
3803
+ {
3804
+ "epoch": 138.41059602649005,
3805
+ "learning_rate": 8.065156794425087e-07,
3806
+ "loss": 1.3236,
3807
+ "step": 12540
3808
+ },
3809
+ {
3810
+ "epoch": 138.63134657836645,
3811
+ "learning_rate": 8.062020905923344e-07,
3812
+ "loss": 1.2922,
3813
+ "step": 12560
3814
+ },
3815
+ {
3816
+ "epoch": 138.85209713024284,
3817
+ "learning_rate": 8.058885017421603e-07,
3818
+ "loss": 1.3773,
3819
+ "step": 12580
3820
+ },
3821
+ {
3822
+ "epoch": 139.0728476821192,
3823
+ "learning_rate": 8.055749128919861e-07,
3824
+ "loss": 1.3378,
3825
+ "step": 12600
3826
+ },
3827
+ {
3828
+ "epoch": 139.2935982339956,
3829
+ "learning_rate": 8.052613240418117e-07,
3830
+ "loss": 1.3817,
3831
+ "step": 12620
3832
+ },
3833
+ {
3834
+ "epoch": 139.51434878587196,
3835
+ "learning_rate": 8.049477351916376e-07,
3836
+ "loss": 1.3296,
3837
+ "step": 12640
3838
+ },
3839
+ {
3840
+ "epoch": 139.73509933774835,
3841
+ "learning_rate": 8.046341463414634e-07,
3842
+ "loss": 1.311,
3843
+ "step": 12660
3844
+ },
3845
+ {
3846
+ "epoch": 139.95584988962472,
3847
+ "learning_rate": 8.043205574912892e-07,
3848
+ "loss": 1.3402,
3849
+ "step": 12680
3850
+ },
3851
+ {
3852
+ "epoch": 140.1766004415011,
3853
+ "learning_rate": 8.040069686411149e-07,
3854
+ "loss": 1.3487,
3855
+ "step": 12700
3856
+ },
3857
+ {
3858
+ "epoch": 140.39735099337747,
3859
+ "learning_rate": 8.036933797909408e-07,
3860
+ "loss": 1.3242,
3861
+ "step": 12720
3862
+ },
3863
+ {
3864
+ "epoch": 140.61810154525386,
3865
+ "learning_rate": 8.033797909407665e-07,
3866
+ "loss": 1.3486,
3867
+ "step": 12740
3868
+ },
3869
+ {
3870
+ "epoch": 140.83885209713026,
3871
+ "learning_rate": 8.030662020905923e-07,
3872
+ "loss": 1.3231,
3873
+ "step": 12760
3874
+ },
3875
+ {
3876
+ "epoch": 141.05960264900662,
3877
+ "learning_rate": 8.02752613240418e-07,
3878
+ "loss": 1.3466,
3879
+ "step": 12780
3880
+ },
3881
+ {
3882
+ "epoch": 141.280353200883,
3883
+ "learning_rate": 8.024390243902439e-07,
3884
+ "loss": 1.3044,
3885
+ "step": 12800
3886
+ },
3887
+ {
3888
+ "epoch": 141.50110375275938,
3889
+ "learning_rate": 8.021254355400696e-07,
3890
+ "loss": 1.3127,
3891
+ "step": 12820
3892
+ },
3893
+ {
3894
+ "epoch": 141.72185430463577,
3895
+ "learning_rate": 8.018118466898954e-07,
3896
+ "loss": 1.3426,
3897
+ "step": 12840
3898
+ },
3899
+ {
3900
+ "epoch": 141.94260485651213,
3901
+ "learning_rate": 8.014982578397213e-07,
3902
+ "loss": 1.3484,
3903
+ "step": 12860
3904
+ },
3905
+ {
3906
+ "epoch": 142.16335540838853,
3907
+ "learning_rate": 8.011846689895469e-07,
3908
+ "loss": 1.3492,
3909
+ "step": 12880
3910
+ },
3911
+ {
3912
+ "epoch": 142.3841059602649,
3913
+ "learning_rate": 8.008710801393729e-07,
3914
+ "loss": 1.3381,
3915
+ "step": 12900
3916
+ },
3917
+ {
3918
+ "epoch": 142.60485651214128,
3919
+ "learning_rate": 8.005574912891985e-07,
3920
+ "loss": 1.3326,
3921
+ "step": 12920
3922
+ },
3923
+ {
3924
+ "epoch": 142.82560706401765,
3925
+ "learning_rate": 8.002439024390244e-07,
3926
+ "loss": 1.3415,
3927
+ "step": 12940
3928
+ },
3929
+ {
3930
+ "epoch": 143.04635761589404,
3931
+ "learning_rate": 7.999303135888501e-07,
3932
+ "loss": 1.3218,
3933
+ "step": 12960
3934
+ },
3935
+ {
3936
+ "epoch": 143.26710816777043,
3937
+ "learning_rate": 7.99616724738676e-07,
3938
+ "loss": 1.3619,
3939
+ "step": 12980
3940
+ },
3941
+ {
3942
+ "epoch": 143.4878587196468,
3943
+ "learning_rate": 7.993031358885017e-07,
3944
+ "loss": 1.2859,
3945
+ "step": 13000
3946
+ },
3947
+ {
3948
+ "epoch": 143.7086092715232,
3949
+ "learning_rate": 7.989895470383275e-07,
3950
+ "loss": 1.3311,
3951
+ "step": 13020
3952
+ },
3953
+ {
3954
+ "epoch": 143.92935982339955,
3955
+ "learning_rate": 7.986759581881533e-07,
3956
+ "loss": 1.3699,
3957
+ "step": 13040
3958
+ },
3959
+ {
3960
+ "epoch": 144.15011037527594,
3961
+ "learning_rate": 7.98362369337979e-07,
3962
+ "loss": 1.3038,
3963
+ "step": 13060
3964
+ },
3965
+ {
3966
+ "epoch": 144.3708609271523,
3967
+ "learning_rate": 7.980487804878048e-07,
3968
+ "loss": 1.3118,
3969
+ "step": 13080
3970
+ },
3971
+ {
3972
+ "epoch": 144.5916114790287,
3973
+ "learning_rate": 7.977351916376306e-07,
3974
+ "loss": 1.3181,
3975
+ "step": 13100
3976
+ },
3977
+ {
3978
+ "epoch": 144.81236203090506,
3979
+ "learning_rate": 7.974216027874565e-07,
3980
+ "loss": 1.3102,
3981
+ "step": 13120
3982
+ },
3983
+ {
3984
+ "epoch": 145.03311258278146,
3985
+ "learning_rate": 7.971080139372822e-07,
3986
+ "loss": 1.3476,
3987
+ "step": 13140
3988
+ },
3989
+ {
3990
+ "epoch": 145.25386313465785,
3991
+ "learning_rate": 7.96794425087108e-07,
3992
+ "loss": 1.3301,
3993
+ "step": 13160
3994
+ },
3995
+ {
3996
+ "epoch": 145.4746136865342,
3997
+ "learning_rate": 7.964808362369338e-07,
3998
+ "loss": 1.3198,
3999
+ "step": 13180
4000
+ },
4001
+ {
4002
+ "epoch": 145.6953642384106,
4003
+ "learning_rate": 7.961672473867596e-07,
4004
+ "loss": 1.3133,
4005
+ "step": 13200
4006
+ },
4007
+ {
4008
+ "epoch": 145.91611479028697,
4009
+ "learning_rate": 7.958536585365853e-07,
4010
+ "loss": 1.3621,
4011
+ "step": 13220
4012
+ },
4013
+ {
4014
+ "epoch": 146.13686534216336,
4015
+ "learning_rate": 7.955400696864111e-07,
4016
+ "loss": 1.2893,
4017
+ "step": 13240
4018
+ },
4019
+ {
4020
+ "epoch": 146.35761589403972,
4021
+ "learning_rate": 7.952264808362369e-07,
4022
+ "loss": 1.3464,
4023
+ "step": 13260
4024
+ },
4025
+ {
4026
+ "epoch": 146.57836644591612,
4027
+ "learning_rate": 7.949128919860627e-07,
4028
+ "loss": 1.3016,
4029
+ "step": 13280
4030
+ },
4031
+ {
4032
+ "epoch": 146.79911699779248,
4033
+ "learning_rate": 7.945993031358884e-07,
4034
+ "loss": 1.3302,
4035
+ "step": 13300
4036
+ },
4037
+ {
4038
+ "epoch": 147.01986754966887,
4039
+ "learning_rate": 7.942857142857143e-07,
4040
+ "loss": 1.3465,
4041
+ "step": 13320
4042
+ },
4043
+ {
4044
+ "epoch": 147.24061810154527,
4045
+ "learning_rate": 7.9397212543554e-07,
4046
+ "loss": 1.3497,
4047
+ "step": 13340
4048
+ },
4049
+ {
4050
+ "epoch": 147.46136865342163,
4051
+ "learning_rate": 7.936585365853658e-07,
4052
+ "loss": 1.335,
4053
+ "step": 13360
4054
+ },
4055
+ {
4056
+ "epoch": 147.68211920529802,
4057
+ "learning_rate": 7.933449477351915e-07,
4058
+ "loss": 1.312,
4059
+ "step": 13380
4060
+ },
4061
+ {
4062
+ "epoch": 147.9028697571744,
4063
+ "learning_rate": 7.930313588850174e-07,
4064
+ "loss": 1.3215,
4065
+ "step": 13400
4066
+ },
4067
+ {
4068
+ "epoch": 148.12362030905078,
4069
+ "learning_rate": 7.927177700348433e-07,
4070
+ "loss": 1.3434,
4071
+ "step": 13420
4072
+ },
4073
+ {
4074
+ "epoch": 148.34437086092714,
4075
+ "learning_rate": 7.924041811846689e-07,
4076
+ "loss": 1.3217,
4077
+ "step": 13440
4078
+ },
4079
+ {
4080
+ "epoch": 148.56512141280353,
4081
+ "learning_rate": 7.920905923344948e-07,
4082
+ "loss": 1.3013,
4083
+ "step": 13460
4084
+ },
4085
+ {
4086
+ "epoch": 148.7858719646799,
4087
+ "learning_rate": 7.917770034843205e-07,
4088
+ "loss": 1.2845,
4089
+ "step": 13480
4090
+ },
4091
+ {
4092
+ "epoch": 149.0066225165563,
4093
+ "learning_rate": 7.914634146341463e-07,
4094
+ "loss": 1.3128,
4095
+ "step": 13500
4096
+ },
4097
+ {
4098
+ "epoch": 149.22737306843268,
4099
+ "learning_rate": 7.91149825783972e-07,
4100
+ "loss": 1.3168,
4101
+ "step": 13520
4102
+ },
4103
+ {
4104
+ "epoch": 149.44812362030905,
4105
+ "learning_rate": 7.908362369337979e-07,
4106
+ "loss": 1.2723,
4107
+ "step": 13540
4108
+ },
4109
+ {
4110
+ "epoch": 149.66887417218544,
4111
+ "learning_rate": 7.905226480836238e-07,
4112
+ "loss": 1.3252,
4113
+ "step": 13560
4114
+ },
4115
+ {
4116
+ "epoch": 149.8896247240618,
4117
+ "learning_rate": 7.902090592334494e-07,
4118
+ "loss": 1.3171,
4119
+ "step": 13580
4120
+ },
4121
+ {
4122
+ "epoch": 150.1103752759382,
4123
+ "learning_rate": 7.898954703832752e-07,
4124
+ "loss": 1.2999,
4125
+ "step": 13600
4126
+ },
4127
+ {
4128
+ "epoch": 150.33112582781456,
4129
+ "learning_rate": 7.89581881533101e-07,
4130
+ "loss": 1.3204,
4131
+ "step": 13620
4132
+ },
4133
+ {
4134
+ "epoch": 150.55187637969095,
4135
+ "learning_rate": 7.892682926829268e-07,
4136
+ "loss": 1.3109,
4137
+ "step": 13640
4138
+ },
4139
+ {
4140
+ "epoch": 150.77262693156732,
4141
+ "learning_rate": 7.889547038327525e-07,
4142
+ "loss": 1.2844,
4143
+ "step": 13660
4144
+ },
4145
+ {
4146
+ "epoch": 150.9933774834437,
4147
+ "learning_rate": 7.886411149825784e-07,
4148
+ "loss": 1.3289,
4149
+ "step": 13680
4150
+ },
4151
+ {
4152
+ "epoch": 151.2141280353201,
4153
+ "learning_rate": 7.883275261324042e-07,
4154
+ "loss": 1.2994,
4155
+ "step": 13700
4156
+ },
4157
+ {
4158
+ "epoch": 151.43487858719647,
4159
+ "learning_rate": 7.8801393728223e-07,
4160
+ "loss": 1.3577,
4161
+ "step": 13720
4162
+ },
4163
+ {
4164
+ "epoch": 151.65562913907286,
4165
+ "learning_rate": 7.877003484320557e-07,
4166
+ "loss": 1.3082,
4167
+ "step": 13740
4168
+ },
4169
+ {
4170
+ "epoch": 151.87637969094922,
4171
+ "learning_rate": 7.873867595818815e-07,
4172
+ "loss": 1.327,
4173
+ "step": 13760
4174
+ },
4175
+ {
4176
+ "epoch": 152.0971302428256,
4177
+ "learning_rate": 7.870731707317073e-07,
4178
+ "loss": 1.2884,
4179
+ "step": 13780
4180
+ },
4181
+ {
4182
+ "epoch": 152.31788079470198,
4183
+ "learning_rate": 7.86759581881533e-07,
4184
+ "loss": 1.3056,
4185
+ "step": 13800
4186
+ },
4187
+ {
4188
+ "epoch": 152.53863134657837,
4189
+ "learning_rate": 7.864459930313588e-07,
4190
+ "loss": 1.3033,
4191
+ "step": 13820
4192
+ },
4193
+ {
4194
+ "epoch": 152.75938189845473,
4195
+ "learning_rate": 7.861324041811847e-07,
4196
+ "loss": 1.3124,
4197
+ "step": 13840
4198
+ },
4199
+ {
4200
+ "epoch": 152.98013245033113,
4201
+ "learning_rate": 7.858188153310104e-07,
4202
+ "loss": 1.3514,
4203
+ "step": 13860
4204
+ },
4205
+ {
4206
+ "epoch": 153.20088300220752,
4207
+ "learning_rate": 7.855052264808362e-07,
4208
+ "loss": 1.3502,
4209
+ "step": 13880
4210
+ },
4211
+ {
4212
+ "epoch": 153.42163355408388,
4213
+ "learning_rate": 7.851916376306619e-07,
4214
+ "loss": 1.3594,
4215
+ "step": 13900
4216
+ },
4217
+ {
4218
+ "epoch": 153.64238410596028,
4219
+ "learning_rate": 7.848780487804878e-07,
4220
+ "loss": 1.3061,
4221
+ "step": 13920
4222
+ },
4223
+ {
4224
+ "epoch": 153.86313465783664,
4225
+ "learning_rate": 7.845644599303136e-07,
4226
+ "loss": 1.2751,
4227
+ "step": 13940
4228
+ },
4229
+ {
4230
+ "epoch": 154.08388520971303,
4231
+ "learning_rate": 7.842508710801393e-07,
4232
+ "loss": 1.3334,
4233
+ "step": 13960
4234
+ },
4235
+ {
4236
+ "epoch": 154.3046357615894,
4237
+ "learning_rate": 7.839372822299652e-07,
4238
+ "loss": 1.337,
4239
+ "step": 13980
4240
+ },
4241
+ {
4242
+ "epoch": 154.5253863134658,
4243
+ "learning_rate": 7.836236933797909e-07,
4244
+ "loss": 1.3272,
4245
+ "step": 14000
4246
+ },
4247
+ {
4248
+ "epoch": 154.74613686534215,
4249
+ "learning_rate": 7.833101045296167e-07,
4250
+ "loss": 1.3093,
4251
+ "step": 14020
4252
+ },
4253
+ {
4254
+ "epoch": 154.96688741721854,
4255
+ "learning_rate": 7.829965156794425e-07,
4256
+ "loss": 1.2967,
4257
+ "step": 14040
4258
+ },
4259
+ {
4260
+ "epoch": 155.18763796909494,
4261
+ "learning_rate": 7.826829268292683e-07,
4262
+ "loss": 1.3127,
4263
+ "step": 14060
4264
+ },
4265
+ {
4266
+ "epoch": 155.4083885209713,
4267
+ "learning_rate": 7.82369337979094e-07,
4268
+ "loss": 1.3198,
4269
+ "step": 14080
4270
+ },
4271
+ {
4272
+ "epoch": 155.6291390728477,
4273
+ "learning_rate": 7.820557491289198e-07,
4274
+ "loss": 1.2706,
4275
+ "step": 14100
4276
+ },
4277
+ {
4278
+ "epoch": 155.84988962472406,
4279
+ "learning_rate": 7.817421602787456e-07,
4280
+ "loss": 1.3346,
4281
+ "step": 14120
4282
+ },
4283
+ {
4284
+ "epoch": 156.07064017660045,
4285
+ "learning_rate": 7.814285714285714e-07,
4286
+ "loss": 1.3161,
4287
+ "step": 14140
4288
+ },
4289
+ {
4290
+ "epoch": 156.2913907284768,
4291
+ "learning_rate": 7.811149825783972e-07,
4292
+ "loss": 1.3083,
4293
+ "step": 14160
4294
+ },
4295
+ {
4296
+ "epoch": 156.5121412803532,
4297
+ "learning_rate": 7.80801393728223e-07,
4298
+ "loss": 1.3371,
4299
+ "step": 14180
4300
+ },
4301
+ {
4302
+ "epoch": 156.73289183222957,
4303
+ "learning_rate": 7.804878048780488e-07,
4304
+ "loss": 1.3168,
4305
+ "step": 14200
4306
+ },
4307
+ {
4308
+ "epoch": 156.95364238410596,
4309
+ "learning_rate": 7.801742160278745e-07,
4310
+ "loss": 1.3037,
4311
+ "step": 14220
4312
+ },
4313
+ {
4314
+ "epoch": 157.17439293598235,
4315
+ "learning_rate": 7.798606271777003e-07,
4316
+ "loss": 1.2958,
4317
+ "step": 14240
4318
+ },
4319
+ {
4320
+ "epoch": 157.39514348785872,
4321
+ "learning_rate": 7.795470383275261e-07,
4322
+ "loss": 1.3027,
4323
+ "step": 14260
4324
+ },
4325
+ {
4326
+ "epoch": 157.6158940397351,
4327
+ "learning_rate": 7.792334494773519e-07,
4328
+ "loss": 1.2728,
4329
+ "step": 14280
4330
+ },
4331
+ {
4332
+ "epoch": 157.83664459161147,
4333
+ "learning_rate": 7.789198606271777e-07,
4334
+ "loss": 1.3138,
4335
+ "step": 14300
4336
+ },
4337
+ {
4338
+ "epoch": 158.05739514348787,
4339
+ "learning_rate": 7.786062717770035e-07,
4340
+ "loss": 1.3205,
4341
+ "step": 14320
4342
+ },
4343
+ {
4344
+ "epoch": 158.27814569536423,
4345
+ "learning_rate": 7.782926829268292e-07,
4346
+ "loss": 1.308,
4347
+ "step": 14340
4348
+ },
4349
+ {
4350
+ "epoch": 158.49889624724062,
4351
+ "learning_rate": 7.77979094076655e-07,
4352
+ "loss": 1.2679,
4353
+ "step": 14360
4354
+ },
4355
+ {
4356
+ "epoch": 158.719646799117,
4357
+ "learning_rate": 7.776655052264809e-07,
4358
+ "loss": 1.3194,
4359
+ "step": 14380
4360
+ },
4361
+ {
4362
+ "epoch": 158.94039735099338,
4363
+ "learning_rate": 7.773519163763066e-07,
4364
+ "loss": 1.3278,
4365
+ "step": 14400
4366
+ },
4367
+ {
4368
+ "epoch": 158.94039735099338,
4369
+ "eval_bleu": 52.1498,
4370
+ "eval_gen_len": 8.5,
4371
+ "eval_loss": 1.9856631755828857,
4372
+ "eval_runtime": 3.1482,
4373
+ "eval_samples_per_second": 9.529,
4374
+ "eval_steps_per_second": 1.906,
4375
+ "step": 14400
4376
  }
4377
  ],
4378
  "logging_steps": 20,
 
4392
  "attributes": {}
4393
  }
4394
  },
4395
+ "total_flos": 4.258388618983834e+16,
4396
  "train_batch_size": 5,
4397
  "trial_name": null,
4398
  "trial_params": null