plip commited on
Commit
03d2158
1 Parent(s): 9156c9d

Training in progress, step 130000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0147a34f11e7c1f08c8f5a1a7b664f7685f6024ff50f7e310c340458a7ae47df
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b61f0fcd50875833fb0e1f9ce51cdf7d430e60b619e76669b19c533bc334c88
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6fe95a3571e9df68816ac46ccf405f0fd55b7d69662da0acabe531bd8d6e1a7
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39c23cb20328d6cd580ce88942d789c9a02552a95a268d89326d508421abbe4a
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13dfeb18522f460b3bcc4a8a207781907714f968fa9018d9e319a8e4cb1a2731
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8e29d5733f7ec570f71140fb29faf2306a82ee45c27c9eb9c785e5d73fa8382
3
+ size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c93d998395352ee5e4e32f058b4acd76a3b9750cf1106a383769865ab27f4f83
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dabc5ed16f76e0d1a8c7f2b2c2a3619addb4d0c3eca0a88fa6e7b0ba2beea5dd
3
+ size 14439
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf0bf23f928568c7a3122050cfa99355815fe8c7f93429bc6536f64596e3cd0c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7531b762a4f77385b5cf4054c1324ef1475bf3e0305baf6834591cd7a2e6b325
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ea81d3c492be115b8468d1cd1e3438b2c19fcafe21dc467d287c05e9211418f
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95971fab031e1bc0e074238eeb9bb97d0dce735f44e80bc1ac009c7c3e11e0ef
3
+ size 14439
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ccdbb1080343e6446b837638f679019013e2908bdad70d4dd13ba9861cbc74c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b5759d26021494b175082e8aa472aecf989df6cf2e73cc13099dc5f04e8a3a7
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b675771b8e40ada562deddfe20382c77a27fda6482714c03a215febc190e3d96
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:437af33cac768d06b94a7ff819e2415c2d01a6cb933eea2964edd87e0a74d2a3
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6547f6037906559622b01a609d68d147aca5926d35d56c61c1cb91881c5b9c16
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f46bc5befc0dd2669aebf64aa0216b1a5da79235f03660685ed5378a375aacc
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e726e73c63cfe3e121582d5f26c95cee1a06ebdb69c1f3b859e1406e64a4d59
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10d615ff7d950e30ec30974f2cf78479d745d92e39a0b3ba0bec5c1a7fb2cdeb
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60ddf5c33f60196860027b40e8443017b8ed479cc5dfb73f8a076fe07f546fd4
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1c3de3c2d66025eca4b56b8d5efacb781ba1e9b4daae40acd9c92c0f7cbe98c
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.676583228652839,
5
- "global_step": 120000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2406,11 +2406,211 @@
2406
  "eval_samples_per_second": 1855.209,
2407
  "eval_steps_per_second": 29.683,
2408
  "step": 120000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2409
  }
2410
  ],
2411
  "max_steps": 500000,
2412
  "num_train_epochs": 16,
2413
- "total_flos": 3.8338217014380724e+21,
2414
  "trial_name": null,
2415
  "trial_params": null
2416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.9829651643739084,
5
+ "global_step": 130000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2406
  "eval_samples_per_second": 1855.209,
2407
  "eval_steps_per_second": 29.683,
2408
  "step": 120000
2409
+ },
2410
+ {
2411
+ "epoch": 3.69,
2412
+ "learning_rate": 0.0002720249765698123,
2413
+ "loss": 0.3661,
2414
+ "step": 120500
2415
+ },
2416
+ {
2417
+ "epoch": 3.71,
2418
+ "learning_rate": 0.0002717412091888626,
2419
+ "loss": 0.3663,
2420
+ "step": 121000
2421
+ },
2422
+ {
2423
+ "epoch": 3.71,
2424
+ "eval_loss": 0.7859072089195251,
2425
+ "eval_runtime": 0.5189,
2426
+ "eval_samples_per_second": 1927.188,
2427
+ "eval_steps_per_second": 30.835,
2428
+ "step": 121000
2429
+ },
2430
+ {
2431
+ "epoch": 3.72,
2432
+ "learning_rate": 0.00027145616514475274,
2433
+ "loss": 0.3656,
2434
+ "step": 121500
2435
+ },
2436
+ {
2437
+ "epoch": 3.74,
2438
+ "learning_rate": 0.0002711698475546788,
2439
+ "loss": 0.3656,
2440
+ "step": 122000
2441
+ },
2442
+ {
2443
+ "epoch": 3.74,
2444
+ "eval_loss": 0.7800880074501038,
2445
+ "eval_runtime": 0.5426,
2446
+ "eval_samples_per_second": 1842.936,
2447
+ "eval_steps_per_second": 29.487,
2448
+ "step": 122000
2449
+ },
2450
+ {
2451
+ "epoch": 3.75,
2452
+ "learning_rate": 0.00027088225954976407,
2453
+ "loss": 0.3653,
2454
+ "step": 122500
2455
+ },
2456
+ {
2457
+ "epoch": 3.77,
2458
+ "learning_rate": 0.0002705934042750249,
2459
+ "loss": 0.3653,
2460
+ "step": 123000
2461
+ },
2462
+ {
2463
+ "epoch": 3.77,
2464
+ "eval_loss": 0.7793842554092407,
2465
+ "eval_runtime": 0.5318,
2466
+ "eval_samples_per_second": 1880.356,
2467
+ "eval_steps_per_second": 30.086,
2468
+ "step": 123000
2469
+ },
2470
+ {
2471
+ "epoch": 3.78,
2472
+ "learning_rate": 0.00027030328488933625,
2473
+ "loss": 0.3649,
2474
+ "step": 123500
2475
+ },
2476
+ {
2477
+ "epoch": 3.8,
2478
+ "learning_rate": 0.00027001190456539726,
2479
+ "loss": 0.3648,
2480
+ "step": 124000
2481
+ },
2482
+ {
2483
+ "epoch": 3.8,
2484
+ "eval_loss": 0.7881133556365967,
2485
+ "eval_runtime": 0.507,
2486
+ "eval_samples_per_second": 1972.529,
2487
+ "eval_steps_per_second": 31.56,
2488
+ "step": 124000
2489
+ },
2490
+ {
2491
+ "epoch": 3.81,
2492
+ "learning_rate": 0.0002697192664896965,
2493
+ "loss": 0.3648,
2494
+ "step": 124500
2495
+ },
2496
+ {
2497
+ "epoch": 3.83,
2498
+ "learning_rate": 0.00026942537386247706,
2499
+ "loss": 0.3644,
2500
+ "step": 125000
2501
+ },
2502
+ {
2503
+ "epoch": 3.83,
2504
+ "eval_loss": 0.7862609624862671,
2505
+ "eval_runtime": 0.5236,
2506
+ "eval_samples_per_second": 1909.846,
2507
+ "eval_steps_per_second": 30.558,
2508
+ "step": 125000
2509
+ },
2510
+ {
2511
+ "epoch": 3.85,
2512
+ "learning_rate": 0.0002691302298977016,
2513
+ "loss": 0.3643,
2514
+ "step": 125500
2515
+ },
2516
+ {
2517
+ "epoch": 3.86,
2518
+ "learning_rate": 0.0002688338378230173,
2519
+ "loss": 0.3642,
2520
+ "step": 126000
2521
+ },
2522
+ {
2523
+ "epoch": 3.86,
2524
+ "eval_loss": 0.7819104194641113,
2525
+ "eval_runtime": 0.5098,
2526
+ "eval_samples_per_second": 1961.492,
2527
+ "eval_steps_per_second": 31.384,
2528
+ "step": 126000
2529
+ },
2530
+ {
2531
+ "epoch": 3.88,
2532
+ "learning_rate": 0.00026853620087972035,
2533
+ "loss": 0.3636,
2534
+ "step": 126500
2535
+ },
2536
+ {
2537
+ "epoch": 3.89,
2538
+ "learning_rate": 0.00026823732232272065,
2539
+ "loss": 0.3638,
2540
+ "step": 127000
2541
+ },
2542
+ {
2543
+ "epoch": 3.89,
2544
+ "eval_loss": 0.7886289358139038,
2545
+ "eval_runtime": 0.5488,
2546
+ "eval_samples_per_second": 1822.211,
2547
+ "eval_steps_per_second": 29.155,
2548
+ "step": 127000
2549
+ },
2550
+ {
2551
+ "epoch": 3.91,
2552
+ "learning_rate": 0.0002679372054205063,
2553
+ "loss": 0.3637,
2554
+ "step": 127500
2555
+ },
2556
+ {
2557
+ "epoch": 3.92,
2558
+ "learning_rate": 0.0002676358534551076,
2559
+ "loss": 0.3637,
2560
+ "step": 128000
2561
+ },
2562
+ {
2563
+ "epoch": 3.92,
2564
+ "eval_loss": 0.7856259942054749,
2565
+ "eval_runtime": 0.5448,
2566
+ "eval_samples_per_second": 1835.504,
2567
+ "eval_steps_per_second": 29.368,
2568
+ "step": 128000
2569
+ },
2570
+ {
2571
+ "epoch": 3.94,
2572
+ "learning_rate": 0.00026733326972206133,
2573
+ "loss": 0.3631,
2574
+ "step": 128500
2575
+ },
2576
+ {
2577
+ "epoch": 3.95,
2578
+ "learning_rate": 0.0002670294575303748,
2579
+ "loss": 0.3631,
2580
+ "step": 129000
2581
+ },
2582
+ {
2583
+ "epoch": 3.95,
2584
+ "eval_loss": 0.7774640917778015,
2585
+ "eval_runtime": 0.5451,
2586
+ "eval_samples_per_second": 1834.446,
2587
+ "eval_steps_per_second": 29.351,
2588
+ "step": 129000
2589
+ },
2590
+ {
2591
+ "epoch": 3.97,
2592
+ "learning_rate": 0.0002667244202024894,
2593
+ "loss": 0.3629,
2594
+ "step": 129500
2595
+ },
2596
+ {
2597
+ "epoch": 3.98,
2598
+ "learning_rate": 0.00026641816107424453,
2599
+ "loss": 0.363,
2600
+ "step": 130000
2601
+ },
2602
+ {
2603
+ "epoch": 3.98,
2604
+ "eval_loss": 0.7824729084968567,
2605
+ "eval_runtime": 0.5158,
2606
+ "eval_samples_per_second": 1938.903,
2607
+ "eval_steps_per_second": 31.022,
2608
+ "step": 130000
2609
  }
2610
  ],
2611
  "max_steps": 500000,
2612
  "num_train_epochs": 16,
2613
+ "total_flos": 4.1533088402125724e+21,
2614
  "trial_name": null,
2615
  "trial_params": null
2616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6fe95a3571e9df68816ac46ccf405f0fd55b7d69662da0acabe531bd8d6e1a7
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39c23cb20328d6cd580ce88942d789c9a02552a95a268d89326d508421abbe4a
3
  size 102501541