plip commited on
Commit
f982b48
1 Parent(s): 367ccde

Training in progress, step 380000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8884578ecff20bdc143758e28e0d85c34dad8dbfa37132c1e5318c8f80a9a1be
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0bfce1b1773be96f0d3fe813d2ed231bcc619aa9d86090dabfebda938938679
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:797df8ba5787bd4a5fbf872ca4b3fe3fdb3db1dba64902a8c55adee164838ae0
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd79899bdba4b0a42b6695b21b791ea0a1c2b1e7f7e8aeb0419ab9a5bb16c136
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc84ada216c0172437f14147bb53f92637e055287492931833e7c516a6d9eed3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aa9c03b0f188995dd79f725ff6a9978a0d337dceeebc68b1530033452b02bc6
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc84ada216c0172437f14147bb53f92637e055287492931833e7c516a6d9eed3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aa9c03b0f188995dd79f725ff6a9978a0d337dceeebc68b1530033452b02bc6
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc84ada216c0172437f14147bb53f92637e055287492931833e7c516a6d9eed3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aa9c03b0f188995dd79f725ff6a9978a0d337dceeebc68b1530033452b02bc6
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc84ada216c0172437f14147bb53f92637e055287492931833e7c516a6d9eed3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aa9c03b0f188995dd79f725ff6a9978a0d337dceeebc68b1530033452b02bc6
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc84ada216c0172437f14147bb53f92637e055287492931833e7c516a6d9eed3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aa9c03b0f188995dd79f725ff6a9978a0d337dceeebc68b1530033452b02bc6
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc84ada216c0172437f14147bb53f92637e055287492931833e7c516a6d9eed3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aa9c03b0f188995dd79f725ff6a9978a0d337dceeebc68b1530033452b02bc6
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc84ada216c0172437f14147bb53f92637e055287492931833e7c516a6d9eed3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aa9c03b0f188995dd79f725ff6a9978a0d337dceeebc68b1530033452b02bc6
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc84ada216c0172437f14147bb53f92637e055287492931833e7c516a6d9eed3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aa9c03b0f188995dd79f725ff6a9978a0d337dceeebc68b1530033452b02bc6
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:580594f4768ecd1bd92e87dca92e874365a397305161c1f8781b79f1f0b613ba
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c2047b5f47fc3de929bb0738f7fbdd248300ab063f6fd4eddcabc29f5482852
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.429634537947909,
5
- "global_step": 370000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7406,11 +7406,211 @@
7406
  "eval_samples_per_second": 766.44,
7407
  "eval_steps_per_second": 12.263,
7408
  "step": 370000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7409
  }
7410
  ],
7411
  "max_steps": 500000,
7412
  "num_train_epochs": 13,
7413
- "total_flos": 1.1820902824439095e+22,
7414
  "trial_name": null,
7415
  "trial_params": null
7416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.684489525460013,
5
+ "global_step": 380000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7406
  "eval_samples_per_second": 766.44,
7407
  "eval_steps_per_second": 12.263,
7408
  "step": 370000
7409
+ },
7410
+ {
7411
+ "epoch": 9.44,
7412
+ "learning_rate": 6.0012253629189544e-05,
7413
+ "loss": 0.2753,
7414
+ "step": 370500
7415
+ },
7416
+ {
7417
+ "epoch": 9.46,
7418
+ "learning_rate": 5.965048145015944e-05,
7419
+ "loss": 0.2745,
7420
+ "step": 371000
7421
+ },
7422
+ {
7423
+ "epoch": 9.46,
7424
+ "eval_loss": 0.807996928691864,
7425
+ "eval_runtime": 1.2961,
7426
+ "eval_samples_per_second": 771.545,
7427
+ "eval_steps_per_second": 12.345,
7428
+ "step": 371000
7429
+ },
7430
+ {
7431
+ "epoch": 9.47,
7432
+ "learning_rate": 5.928975199818785e-05,
7433
+ "loss": 0.2744,
7434
+ "step": 371500
7435
+ },
7436
+ {
7437
+ "epoch": 9.48,
7438
+ "learning_rate": 5.893006921815428e-05,
7439
+ "loss": 0.2745,
7440
+ "step": 372000
7441
+ },
7442
+ {
7443
+ "epoch": 9.48,
7444
+ "eval_loss": 0.8102282881736755,
7445
+ "eval_runtime": 1.3228,
7446
+ "eval_samples_per_second": 755.978,
7447
+ "eval_steps_per_second": 12.096,
7448
+ "step": 372000
7449
+ },
7450
+ {
7451
+ "epoch": 9.49,
7452
+ "learning_rate": 5.857143704349198e-05,
7453
+ "loss": 0.2749,
7454
+ "step": 372500
7455
+ },
7456
+ {
7457
+ "epoch": 9.51,
7458
+ "learning_rate": 5.8213859396144986e-05,
7459
+ "loss": 0.2747,
7460
+ "step": 373000
7461
+ },
7462
+ {
7463
+ "epoch": 9.51,
7464
+ "eval_loss": 0.8059311509132385,
7465
+ "eval_runtime": 1.3365,
7466
+ "eval_samples_per_second": 748.226,
7467
+ "eval_steps_per_second": 11.972,
7468
+ "step": 373000
7469
+ },
7470
+ {
7471
+ "epoch": 9.52,
7472
+ "learning_rate": 5.785734018652507e-05,
7473
+ "loss": 0.2743,
7474
+ "step": 373500
7475
+ },
7476
+ {
7477
+ "epoch": 9.53,
7478
+ "learning_rate": 5.750188331346927e-05,
7479
+ "loss": 0.2742,
7480
+ "step": 374000
7481
+ },
7482
+ {
7483
+ "epoch": 9.53,
7484
+ "eval_loss": 0.8084880113601685,
7485
+ "eval_runtime": 1.3112,
7486
+ "eval_samples_per_second": 762.675,
7487
+ "eval_steps_per_second": 12.203,
7488
+ "step": 374000
7489
+ },
7490
+ {
7491
+ "epoch": 9.54,
7492
+ "learning_rate": 5.714749266419695e-05,
7493
+ "loss": 0.2741,
7494
+ "step": 374500
7495
+ },
7496
+ {
7497
+ "epoch": 9.56,
7498
+ "learning_rate": 5.6794172114267566e-05,
7499
+ "loss": 0.2742,
7500
+ "step": 375000
7501
+ },
7502
+ {
7503
+ "epoch": 9.56,
7504
+ "eval_loss": 0.8031001091003418,
7505
+ "eval_runtime": 1.2768,
7506
+ "eval_samples_per_second": 783.2,
7507
+ "eval_steps_per_second": 12.531,
7508
+ "step": 375000
7509
+ },
7510
+ {
7511
+ "epoch": 9.57,
7512
+ "learning_rate": 5.6441925527537914e-05,
7513
+ "loss": 0.2738,
7514
+ "step": 375500
7515
+ },
7516
+ {
7517
+ "epoch": 9.58,
7518
+ "learning_rate": 5.60907567561203e-05,
7519
+ "loss": 0.274,
7520
+ "step": 376000
7521
+ },
7522
+ {
7523
+ "epoch": 9.58,
7524
+ "eval_loss": 0.8067184090614319,
7525
+ "eval_runtime": 1.3299,
7526
+ "eval_samples_per_second": 751.95,
7527
+ "eval_steps_per_second": 12.031,
7528
+ "step": 376000
7529
+ },
7530
+ {
7531
+ "epoch": 9.6,
7532
+ "learning_rate": 5.574066964034012e-05,
7533
+ "loss": 0.274,
7534
+ "step": 376500
7535
+ },
7536
+ {
7537
+ "epoch": 9.61,
7538
+ "learning_rate": 5.539166800869402e-05,
7539
+ "loss": 0.274,
7540
+ "step": 377000
7541
+ },
7542
+ {
7543
+ "epoch": 9.61,
7544
+ "eval_loss": 0.8057398200035095,
7545
+ "eval_runtime": 1.2808,
7546
+ "eval_samples_per_second": 780.764,
7547
+ "eval_steps_per_second": 12.492,
7548
+ "step": 377000
7549
+ },
7550
+ {
7551
+ "epoch": 9.62,
7552
+ "learning_rate": 5.5043755677807955e-05,
7553
+ "loss": 0.2738,
7554
+ "step": 377500
7555
+ },
7556
+ {
7557
+ "epoch": 9.63,
7558
+ "learning_rate": 5.4696936452395344e-05,
7559
+ "loss": 0.274,
7560
+ "step": 378000
7561
+ },
7562
+ {
7563
+ "epoch": 9.63,
7564
+ "eval_loss": 0.8031483292579651,
7565
+ "eval_runtime": 1.2827,
7566
+ "eval_samples_per_second": 779.634,
7567
+ "eval_steps_per_second": 12.474,
7568
+ "step": 378000
7569
+ },
7570
+ {
7571
+ "epoch": 9.65,
7572
+ "learning_rate": 5.435121412521576e-05,
7573
+ "loss": 0.2738,
7574
+ "step": 378500
7575
+ },
7576
+ {
7577
+ "epoch": 9.66,
7578
+ "learning_rate": 5.400659247703307e-05,
7579
+ "loss": 0.2738,
7580
+ "step": 379000
7581
+ },
7582
+ {
7583
+ "epoch": 9.66,
7584
+ "eval_loss": 0.8066932559013367,
7585
+ "eval_runtime": 1.3559,
7586
+ "eval_samples_per_second": 737.531,
7587
+ "eval_steps_per_second": 11.8,
7588
+ "step": 379000
7589
+ },
7590
+ {
7591
+ "epoch": 9.67,
7592
+ "learning_rate": 5.36630752765745e-05,
7593
+ "loss": 0.2739,
7594
+ "step": 379500
7595
+ },
7596
+ {
7597
+ "epoch": 9.68,
7598
+ "learning_rate": 5.3320666280489146e-05,
7599
+ "loss": 0.2737,
7600
+ "step": 380000
7601
+ },
7602
+ {
7603
+ "epoch": 9.68,
7604
+ "eval_loss": 0.8089754581451416,
7605
+ "eval_runtime": 1.319,
7606
+ "eval_samples_per_second": 758.149,
7607
+ "eval_steps_per_second": 12.13,
7608
+ "step": 380000
7609
  }
7610
  ],
7611
  "max_steps": 500000,
7612
  "num_train_epochs": 13,
7613
+ "total_flos": 1.2140389963213595e+22,
7614
  "trial_name": null,
7615
  "trial_params": null
7616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:797df8ba5787bd4a5fbf872ca4b3fe3fdb3db1dba64902a8c55adee164838ae0
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd79899bdba4b0a42b6695b21b791ea0a1c2b1e7f7e8aeb0419ab9a5bb16c136
3
  size 102501541