ben81828 commited on
Commit
042187c
·
verified ·
1 Parent(s): 7786a68

Training in progress, step 1400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3ce6daf3c20e1ee39c6a4135462748b3119e8e74d43e226b9b4b7810797d2c9
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1c7ec080c49b4ded7d9add721716bdf06187fb00c39ff223f5982466d466c72
3
  size 29034840
last-checkpoint/global_step1400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8ee78757c87123bc8871369b24b5db68971f91924c7db6fa0b8f2846bfddb2f
3
+ size 43429616
last-checkpoint/global_step1400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6b09c73d734710dcb2492433d7692deb088dde6c5eb1352333f8bec424636bc
3
+ size 43429616
last-checkpoint/global_step1400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:366e924c74d9b076360c87c0401c98bc225d173c1a2e270cc5336c3ebfa88f9d
3
+ size 43429616
last-checkpoint/global_step1400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c02fbd817bfe5eb184c64d4b1dc62d9d79c1f4f6c5c418ab69f62ce35cc287b
3
+ size 43429616
last-checkpoint/global_step1400/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1906adf1b95058095138f9b57795e6e0826a899c9c7e5a525c8eae36ecaa15f4
3
+ size 637299
last-checkpoint/global_step1400/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a0bb562939d112dd834c43097bc2d1ef141d083d45e4138023cb41209e8c874
3
+ size 637171
last-checkpoint/global_step1400/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eb2bc0e5fa21a633543ff99a5a9ea2924b55031a219c0558ebd60e5ccef7d0e
3
+ size 637171
last-checkpoint/global_step1400/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:336e83dddf7ea2bb6e2ee90e33a484acff5b821f73e635c4aaaa44a24086519e
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1350
 
1
+ global_step1400
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9747fe881253e52a47314f48068ef9649032bec4cb284b1b4becbb8787f37faa
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe66a68e61de2221b30fd9749bc68b45a1474bb2cc95901bca9557ac87909355
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ac02a5554a5ef9e3473dcd2926626ae41f4777354859c7d2bf0a0c1188c0583
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd4f3162e46c3bb0f1fc4d3c52c7c33e60f56764458e0c8a73c3810b0a25f8c
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9405d230cc78dac3f3b2ab887674631c15f66fedab0042ab7bc1bd83b8575344
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:185cc99aaa81b1b49b3ddc74aa6f97aa3036330983a7b69d52bd191057f9a5d5
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b15237547030ac62d49d70a5465b2e29515e6334f62416eb16c0c6d073f7c6bf
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e37403c30cb4309e54e5defdde1906486716fc859274035d44aaac5d48a97ba
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca5b091f7e9fe97f38f540a5a26edd08efe79a114c6bc8ff4d4c0d9acb5b20f2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba18a87eab7efca964506a3a18bdf9452eae09db2b3c77bd3a82db3283b5abc0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.4339977502822876,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_detect_scale4/lora/sft/checkpoint-1250",
4
- "epoch": 0.3476693278392995,
5
  "eval_steps": 50,
6
- "global_step": 1350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2410,11 +2410,100 @@
2410
  "eval_steps_per_second": 0.782,
2411
  "num_input_tokens_seen": 14173240,
2412
  "step": 1350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2413
  }
2414
  ],
2415
  "logging_steps": 5,
2416
  "max_steps": 3400,
2417
- "num_input_tokens_seen": 14173240,
2418
  "num_train_epochs": 1,
2419
  "save_steps": 50,
2420
  "stateful_callbacks": {
@@ -2429,7 +2518,7 @@
2429
  "attributes": {}
2430
  }
2431
  },
2432
- "total_flos": 935103008079872.0,
2433
  "train_batch_size": 1,
2434
  "trial_name": null,
2435
  "trial_params": null
 
1
  {
2
  "best_metric": 0.4339977502822876,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_detect_scale4/lora/sft/checkpoint-1250",
4
+ "epoch": 0.3605459696111254,
5
  "eval_steps": 50,
6
+ "global_step": 1400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2410
  "eval_steps_per_second": 0.782,
2411
  "num_input_tokens_seen": 14173240,
2412
  "step": 1350
2413
+ },
2414
+ {
2415
+ "epoch": 0.3489569920164821,
2416
+ "grad_norm": 6.100571377010892,
2417
+ "learning_rate": 7.030720984447279e-05,
2418
+ "loss": 0.41,
2419
+ "num_input_tokens_seen": 14226032,
2420
+ "step": 1355
2421
+ },
2422
+ {
2423
+ "epoch": 0.3502446561936647,
2424
+ "grad_norm": 3.531812694789996,
2425
+ "learning_rate": 7.008477123264848e-05,
2426
+ "loss": 0.3751,
2427
+ "num_input_tokens_seen": 14278128,
2428
+ "step": 1360
2429
+ },
2430
+ {
2431
+ "epoch": 0.3515323203708473,
2432
+ "grad_norm": 13.528736327050117,
2433
+ "learning_rate": 6.986185761302224e-05,
2434
+ "loss": 0.4814,
2435
+ "num_input_tokens_seen": 14330624,
2436
+ "step": 1365
2437
+ },
2438
+ {
2439
+ "epoch": 0.3528199845480299,
2440
+ "grad_norm": 6.2453361475565305,
2441
+ "learning_rate": 6.963847425753403e-05,
2442
+ "loss": 0.5007,
2443
+ "num_input_tokens_seen": 14382416,
2444
+ "step": 1370
2445
+ },
2446
+ {
2447
+ "epoch": 0.35410764872521244,
2448
+ "grad_norm": 3.5868157849734925,
2449
+ "learning_rate": 6.941462644923318e-05,
2450
+ "loss": 0.4335,
2451
+ "num_input_tokens_seen": 14434896,
2452
+ "step": 1375
2453
+ },
2454
+ {
2455
+ "epoch": 0.35539531290239507,
2456
+ "grad_norm": 7.0930284762784925,
2457
+ "learning_rate": 6.919031948215335e-05,
2458
+ "loss": 0.4427,
2459
+ "num_input_tokens_seen": 14487152,
2460
+ "step": 1380
2461
+ },
2462
+ {
2463
+ "epoch": 0.35668297707957763,
2464
+ "grad_norm": 1.8673746248959853,
2465
+ "learning_rate": 6.896555866118741e-05,
2466
+ "loss": 0.42,
2467
+ "num_input_tokens_seen": 14539608,
2468
+ "step": 1385
2469
+ },
2470
+ {
2471
+ "epoch": 0.35797064125676026,
2472
+ "grad_norm": 3.29378340171418,
2473
+ "learning_rate": 6.87403493019619e-05,
2474
+ "loss": 0.4573,
2475
+ "num_input_tokens_seen": 14592168,
2476
+ "step": 1390
2477
+ },
2478
+ {
2479
+ "epoch": 0.3592583054339428,
2480
+ "grad_norm": 4.710051493913417,
2481
+ "learning_rate": 6.851469673071143e-05,
2482
+ "loss": 0.4341,
2483
+ "num_input_tokens_seen": 14643920,
2484
+ "step": 1395
2485
+ },
2486
+ {
2487
+ "epoch": 0.3605459696111254,
2488
+ "grad_norm": 5.46737560287727,
2489
+ "learning_rate": 6.828860628415253e-05,
2490
+ "loss": 0.437,
2491
+ "num_input_tokens_seen": 14697136,
2492
+ "step": 1400
2493
+ },
2494
+ {
2495
+ "epoch": 0.3605459696111254,
2496
+ "eval_loss": 0.46620962023735046,
2497
+ "eval_runtime": 38.4197,
2498
+ "eval_samples_per_second": 3.123,
2499
+ "eval_steps_per_second": 0.781,
2500
+ "num_input_tokens_seen": 14697136,
2501
+ "step": 1400
2502
  }
2503
  ],
2504
  "logging_steps": 5,
2505
  "max_steps": 3400,
2506
+ "num_input_tokens_seen": 14697136,
2507
  "num_train_epochs": 1,
2508
  "save_steps": 50,
2509
  "stateful_callbacks": {
 
2518
  "attributes": {}
2519
  }
2520
  },
2521
+ "total_flos": 969656994627584.0,
2522
  "train_batch_size": 1,
2523
  "trial_name": null,
2524
  "trial_params": null