Femboyuwu2000 commited on
Commit
610bf2f
1 Parent(s): 4558e58

Training in progress, step 7200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59e4deb0fd7483db8ad6ecbc4b685edbd17935d1da824196ba4fc901973119e0
3
  size 13982248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d33ad5ad7297a3474565ac786f8f9d70a33eb5627d08f5a697c138ffc15d88d1
3
  size 13982248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d0a5a0084d24afc11f74c50b817d93674fb054ada62fbbef9c92566dd4897ab
3
  size 7062522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cbe5a6df51d7cf574700e32447f7c59e1f86ee74b6f42f6b5250d0259029ffa
3
  size 7062522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:379130258049e0cbeef66efed7dd213cfd228d8c04c387dcbb54754f2a453a5a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ae43d9a0a1751b368da41b4d38a9e6b8912af0859b5bf0f0514e58b2fa49faf
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e4378be4b8a985777dc1876169a11bcb383075ed65c6a773225043d0598af8d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c777f1142b68b8b3ec3d325314bfa50ddccfacdeb4ee5ac4c1090831572e47da
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5488,
5
  "eval_steps": 500,
6
- "global_step": 6860,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2408,6 +2408,125 @@
2408
  "learning_rate": 2.293046993300198e-05,
2409
  "loss": 3.567,
2410
  "step": 6860
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2411
  }
2412
  ],
2413
  "logging_steps": 20,
@@ -2415,7 +2534,7 @@
2415
  "num_input_tokens_seen": 0,
2416
  "num_train_epochs": 2,
2417
  "save_steps": 20,
2418
- "total_flos": 1.6194507519492096e+16,
2419
  "train_batch_size": 8,
2420
  "trial_name": null,
2421
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.576,
5
  "eval_steps": 500,
6
+ "global_step": 7200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2408
  "learning_rate": 2.293046993300198e-05,
2409
  "loss": 3.567,
2410
  "step": 6860
2411
+ },
2412
+ {
2413
+ "epoch": 0.55,
2414
+ "grad_norm": 21.349533081054688,
2415
+ "learning_rate": 2.288919209281294e-05,
2416
+ "loss": 3.4752,
2417
+ "step": 6880
2418
+ },
2419
+ {
2420
+ "epoch": 0.55,
2421
+ "grad_norm": 25.281597137451172,
2422
+ "learning_rate": 2.284783149873195e-05,
2423
+ "loss": 3.4934,
2424
+ "step": 6900
2425
+ },
2426
+ {
2427
+ "epoch": 0.55,
2428
+ "grad_norm": 32.13029861450195,
2429
+ "learning_rate": 2.2806388584612067e-05,
2430
+ "loss": 3.4793,
2431
+ "step": 6920
2432
+ },
2433
+ {
2434
+ "epoch": 0.56,
2435
+ "grad_norm": 21.672082901000977,
2436
+ "learning_rate": 2.2764863785169857e-05,
2437
+ "loss": 3.4366,
2438
+ "step": 6940
2439
+ },
2440
+ {
2441
+ "epoch": 0.56,
2442
+ "grad_norm": 29.802305221557617,
2443
+ "learning_rate": 2.2723257535980804e-05,
2444
+ "loss": 3.5174,
2445
+ "step": 6960
2446
+ },
2447
+ {
2448
+ "epoch": 0.56,
2449
+ "grad_norm": 43.310577392578125,
2450
+ "learning_rate": 2.2681570273474783e-05,
2451
+ "loss": 3.4745,
2452
+ "step": 6980
2453
+ },
2454
+ {
2455
+ "epoch": 0.56,
2456
+ "grad_norm": 32.417236328125,
2457
+ "learning_rate": 2.2639802434931447e-05,
2458
+ "loss": 3.438,
2459
+ "step": 7000
2460
+ },
2461
+ {
2462
+ "epoch": 0.56,
2463
+ "grad_norm": 42.29374313354492,
2464
+ "learning_rate": 2.259795445847566e-05,
2465
+ "loss": 3.5194,
2466
+ "step": 7020
2467
+ },
2468
+ {
2469
+ "epoch": 0.56,
2470
+ "grad_norm": 51.19217300415039,
2471
+ "learning_rate": 2.2556026783072896e-05,
2472
+ "loss": 3.496,
2473
+ "step": 7040
2474
+ },
2475
+ {
2476
+ "epoch": 0.56,
2477
+ "grad_norm": 24.400171279907227,
2478
+ "learning_rate": 2.251401984852463e-05,
2479
+ "loss": 3.4013,
2480
+ "step": 7060
2481
+ },
2482
+ {
2483
+ "epoch": 0.57,
2484
+ "grad_norm": 26.298309326171875,
2485
+ "learning_rate": 2.2471934095463724e-05,
2486
+ "loss": 3.5607,
2487
+ "step": 7080
2488
+ },
2489
+ {
2490
+ "epoch": 0.57,
2491
+ "grad_norm": 27.419946670532227,
2492
+ "learning_rate": 2.2429769965349818e-05,
2493
+ "loss": 3.4593,
2494
+ "step": 7100
2495
+ },
2496
+ {
2497
+ "epoch": 0.57,
2498
+ "grad_norm": 29.470266342163086,
2499
+ "learning_rate": 2.2387527900464676e-05,
2500
+ "loss": 3.4388,
2501
+ "step": 7120
2502
+ },
2503
+ {
2504
+ "epoch": 0.57,
2505
+ "grad_norm": 21.410829544067383,
2506
+ "learning_rate": 2.2345208343907577e-05,
2507
+ "loss": 3.5141,
2508
+ "step": 7140
2509
+ },
2510
+ {
2511
+ "epoch": 0.57,
2512
+ "grad_norm": 32.501766204833984,
2513
+ "learning_rate": 2.2302811739590642e-05,
2514
+ "loss": 3.4647,
2515
+ "step": 7160
2516
+ },
2517
+ {
2518
+ "epoch": 0.57,
2519
+ "grad_norm": 24.69274139404297,
2520
+ "learning_rate": 2.2260338532234194e-05,
2521
+ "loss": 3.4781,
2522
+ "step": 7180
2523
+ },
2524
+ {
2525
+ "epoch": 0.58,
2526
+ "grad_norm": 25.721759796142578,
2527
+ "learning_rate": 2.2217789167362078e-05,
2528
+ "loss": 3.4405,
2529
+ "step": 7200
2530
  }
2531
  ],
2532
  "logging_steps": 20,
 
2534
  "num_input_tokens_seen": 0,
2535
  "num_train_epochs": 2,
2536
  "save_steps": 20,
2537
+ "total_flos": 1.7005830543409152e+16,
2538
  "train_batch_size": 8,
2539
  "trial_name": null,
2540
  "trial_params": null