plip commited on
Commit
29d6add
1 Parent(s): 17d96ba

Training in progress, step 280000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:185444010f2414af8f6b292f79fa769076772990bf1219a1dafd09b6faae29a4
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:685baa1115c9996a6d3f73f4d770cb44301323ea5ee9d7eb0866dbaffb748894
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94365aa60c35fc7ec52e5e8ba19311622aaab34d18bf7ce3dd77401881509519
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:866ffe9cab4eb4a4754c64cb5d7ed4f31ed0ad83e08ecaffbb7f5035db70963f
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6f6ae6faade50eb043968b5667df29128b1e4a2530013bd32474d5a36afc850
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69c627f38132dd40cef0053712aa1270dae63d12ac97e7fc6aaa3050230ece5f
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cc3f7b88227092a0043ff66b55085eb9bd377bb70cc891cc293b48f870db21f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ff5c53b911e025186f3d4cefd2ae203cd3c867a78b103c8c95c51e8d20b5b99
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f3a5a051f0b0618eb7cb1692de034b0ac1fd365c0c181b09a598798b6235801
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba38210b26270fe6dc17e8687ed18c17cb3c73130918a06af7f287737355cbb7
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9c6b8fbeaadc53fb4ee209c1d104938ceaf8c8a8cbc2fa87ebcabcc6284da17
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:088007f9c38eed502aa04c5f433e4da63387bd973584b94625ca26c1cb52eacf
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d753925d97c5520e660dcdd16394471ced5c4bc24193ed0a377ef70d8717a46
3
  size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2014d323a6a226b752e2eff97f028102e8f8d7d7ca11f19c3f47a58b4e4654ab
3
  size 14439
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdee02ae130781f905df4745f98c06bad459194317ec411b25af7a96f282fee2
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:539ae7fa3d9912913294dc2719a2dd73be6f83eb9b37e29816e50a7b5d35e566
3
+ size 14439
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:823a83d9a98cde647a58d74cc1bea63c670933d602a5a07e7fc2bfa68d3b9e32
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75cd206de91cc2704e6a99c57b9e488ddcdbbc4b410b702c69de32467cde6e75
3
+ size 14439
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4ef1c7732ec4132391d0d6a205bc292fbd5fe79d85d00447b48ac3c30b01e18
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c28314a75be0e567739244bb3713c7239bcefe683fa1af29178e1fe22d80967d
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cb61348726887e329b19406ea4e3e39ac391edeec6dfd8508b3cb524aa33e28
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80db35d9a6af0da10cd7cebe3bf01f3932293e48fc6a8607555efe3b5591e844
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.272312264468887,
5
- "global_step": 270000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -5406,11 +5406,211 @@
5406
  "eval_samples_per_second": 1913.356,
5407
  "eval_steps_per_second": 30.614,
5408
  "step": 270000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5409
  }
5410
  ],
5411
  "max_steps": 500000,
5412
  "num_train_epochs": 16,
5413
- "total_flos": 8.626088843295693e+21,
5414
  "trial_name": null,
5415
  "trial_params": null
5416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.578694200189958,
5
+ "global_step": 280000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
5406
  "eval_samples_per_second": 1913.356,
5407
  "eval_steps_per_second": 30.614,
5408
  "step": 270000
5409
+ },
5410
+ {
5411
+ "epoch": 8.29,
5412
+ "learning_rate": 0.00014733147931802578,
5413
+ "loss": 0.7368,
5414
+ "step": 270500
5415
+ },
5416
+ {
5417
+ "epoch": 8.3,
5418
+ "learning_rate": 0.00014685268692538238,
5419
+ "loss": 0.7368,
5420
+ "step": 271000
5421
+ },
5422
+ {
5423
+ "epoch": 8.3,
5424
+ "eval_loss": 0.9023635387420654,
5425
+ "eval_runtime": 0.5188,
5426
+ "eval_samples_per_second": 1927.356,
5427
+ "eval_steps_per_second": 30.838,
5428
+ "step": 271000
5429
+ },
5430
+ {
5431
+ "epoch": 8.32,
5432
+ "learning_rate": 0.00014637398363044946,
5433
+ "loss": 0.7143,
5434
+ "step": 271500
5435
+ },
5436
+ {
5437
+ "epoch": 8.33,
5438
+ "learning_rate": 0.00014589537466824955,
5439
+ "loss": 0.3428,
5440
+ "step": 272000
5441
+ },
5442
+ {
5443
+ "epoch": 8.33,
5444
+ "eval_loss": 0.7834916710853577,
5445
+ "eval_runtime": 0.5348,
5446
+ "eval_samples_per_second": 1869.839,
5447
+ "eval_steps_per_second": 29.917,
5448
+ "step": 272000
5449
+ },
5450
+ {
5451
+ "epoch": 8.35,
5452
+ "learning_rate": 0.00014541686527277356,
5453
+ "loss": 0.3388,
5454
+ "step": 272500
5455
+ },
5456
+ {
5457
+ "epoch": 8.36,
5458
+ "learning_rate": 0.00014493846067692358,
5459
+ "loss": 0.3376,
5460
+ "step": 273000
5461
+ },
5462
+ {
5463
+ "epoch": 8.36,
5464
+ "eval_loss": 0.7843596935272217,
5465
+ "eval_runtime": 0.5178,
5466
+ "eval_samples_per_second": 1931.2,
5467
+ "eval_steps_per_second": 30.899,
5468
+ "step": 273000
5469
+ },
5470
+ {
5471
+ "epoch": 8.38,
5472
+ "learning_rate": 0.00014446016611245567,
5473
+ "loss": 0.3362,
5474
+ "step": 273500
5475
+ },
5476
+ {
5477
+ "epoch": 8.39,
5478
+ "learning_rate": 0.00014398198680992252,
5479
+ "loss": 0.3369,
5480
+ "step": 274000
5481
+ },
5482
+ {
5483
+ "epoch": 8.39,
5484
+ "eval_loss": 0.7844694256782532,
5485
+ "eval_runtime": 0.5316,
5486
+ "eval_samples_per_second": 1881.272,
5487
+ "eval_steps_per_second": 30.1,
5488
+ "step": 274000
5489
+ },
5490
+ {
5491
+ "epoch": 8.41,
5492
+ "learning_rate": 0.00014350392799861636,
5493
+ "loss": 0.336,
5494
+ "step": 274500
5495
+ },
5496
+ {
5497
+ "epoch": 8.43,
5498
+ "learning_rate": 0.0001430259949065118,
5499
+ "loss": 0.3356,
5500
+ "step": 275000
5501
+ },
5502
+ {
5503
+ "epoch": 8.43,
5504
+ "eval_loss": 0.7838680148124695,
5505
+ "eval_runtime": 0.52,
5506
+ "eval_samples_per_second": 1923.025,
5507
+ "eval_steps_per_second": 30.768,
5508
+ "step": 275000
5509
+ },
5510
+ {
5511
+ "epoch": 8.44,
5512
+ "learning_rate": 0.0001425481927602085,
5513
+ "loss": 0.3348,
5514
+ "step": 275500
5515
+ },
5516
+ {
5517
+ "epoch": 8.46,
5518
+ "learning_rate": 0.0001420705267848743,
5519
+ "loss": 0.3352,
5520
+ "step": 276000
5521
+ },
5522
+ {
5523
+ "epoch": 8.46,
5524
+ "eval_loss": 0.7744572162628174,
5525
+ "eval_runtime": 0.5156,
5526
+ "eval_samples_per_second": 1939.351,
5527
+ "eval_steps_per_second": 31.03,
5528
+ "step": 276000
5529
+ },
5530
+ {
5531
+ "epoch": 8.47,
5532
+ "learning_rate": 0.00014159300220418757,
5533
+ "loss": 0.3342,
5534
+ "step": 276500
5535
+ },
5536
+ {
5537
+ "epoch": 8.49,
5538
+ "learning_rate": 0.0001411156242402808,
5539
+ "loss": 0.3341,
5540
+ "step": 277000
5541
+ },
5542
+ {
5543
+ "epoch": 8.49,
5544
+ "eval_loss": 0.7838852405548096,
5545
+ "eval_runtime": 0.5192,
5546
+ "eval_samples_per_second": 1925.877,
5547
+ "eval_steps_per_second": 30.814,
5548
+ "step": 277000
5549
+ },
5550
+ {
5551
+ "epoch": 8.5,
5552
+ "learning_rate": 0.0001406383981136829,
5553
+ "loss": 0.3339,
5554
+ "step": 277500
5555
+ },
5556
+ {
5557
+ "epoch": 8.52,
5558
+ "learning_rate": 0.00014016132904326226,
5559
+ "loss": 0.334,
5560
+ "step": 278000
5561
+ },
5562
+ {
5563
+ "epoch": 8.52,
5564
+ "eval_loss": 0.7889499664306641,
5565
+ "eval_runtime": 0.5333,
5566
+ "eval_samples_per_second": 1875.279,
5567
+ "eval_steps_per_second": 30.004,
5568
+ "step": 278000
5569
+ },
5570
+ {
5571
+ "epoch": 8.53,
5572
+ "learning_rate": 0.00013968442224616989,
5573
+ "loss": 0.3338,
5574
+ "step": 278500
5575
+ },
5576
+ {
5577
+ "epoch": 8.55,
5578
+ "learning_rate": 0.00013920768293778195,
5579
+ "loss": 0.3337,
5580
+ "step": 279000
5581
+ },
5582
+ {
5583
+ "epoch": 8.55,
5584
+ "eval_loss": 0.7802003026008606,
5585
+ "eval_runtime": 0.5176,
5586
+ "eval_samples_per_second": 1931.905,
5587
+ "eval_steps_per_second": 30.91,
5588
+ "step": 279000
5589
+ },
5590
+ {
5591
+ "epoch": 8.56,
5592
+ "learning_rate": 0.00013873111633164336,
5593
+ "loss": 0.3336,
5594
+ "step": 279500
5595
+ },
5596
+ {
5597
+ "epoch": 8.58,
5598
+ "learning_rate": 0.00013825472763941,
5599
+ "loss": 0.3338,
5600
+ "step": 280000
5601
+ },
5602
+ {
5603
+ "epoch": 8.58,
5604
+ "eval_loss": 0.7855395674705505,
5605
+ "eval_runtime": 0.525,
5606
+ "eval_samples_per_second": 1904.885,
5607
+ "eval_steps_per_second": 30.478,
5608
+ "step": 280000
5609
  }
5610
  ],
5611
  "max_steps": 500000,
5612
  "num_train_epochs": 16,
5613
+ "total_flos": 8.945575982070193e+21,
5614
  "trial_name": null,
5615
  "trial_params": null
5616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94365aa60c35fc7ec52e5e8ba19311622aaab34d18bf7ce3dd77401881509519
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:866ffe9cab4eb4a4754c64cb5d7ed4f31ed0ad83e08ecaffbb7f5035db70963f
3
  size 102501541