Training in progress, step 1400, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step1400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1400/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1400/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1400/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step1400/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1c7ec080c49b4ded7d9add721716bdf06187fb00c39ff223f5982466d466c72
|
3 |
size 29034840
|
last-checkpoint/global_step1400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8ee78757c87123bc8871369b24b5db68971f91924c7db6fa0b8f2846bfddb2f
|
3 |
+
size 43429616
|
last-checkpoint/global_step1400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b6b09c73d734710dcb2492433d7692deb088dde6c5eb1352333f8bec424636bc
|
3 |
+
size 43429616
|
last-checkpoint/global_step1400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:366e924c74d9b076360c87c0401c98bc225d173c1a2e270cc5336c3ebfa88f9d
|
3 |
+
size 43429616
|
last-checkpoint/global_step1400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c02fbd817bfe5eb184c64d4b1dc62d9d79c1f4f6c5c418ab69f62ce35cc287b
|
3 |
+
size 43429616
|
last-checkpoint/global_step1400/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1906adf1b95058095138f9b57795e6e0826a899c9c7e5a525c8eae36ecaa15f4
|
3 |
+
size 637299
|
last-checkpoint/global_step1400/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a0bb562939d112dd834c43097bc2d1ef141d083d45e4138023cb41209e8c874
|
3 |
+
size 637171
|
last-checkpoint/global_step1400/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3eb2bc0e5fa21a633543ff99a5a9ea2924b55031a219c0558ebd60e5ccef7d0e
|
3 |
+
size 637171
|
last-checkpoint/global_step1400/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:336e83dddf7ea2bb6e2ee90e33a484acff5b821f73e635c4aaaa44a24086519e
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step1400
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe66a68e61de2221b30fd9749bc68b45a1474bb2cc95901bca9557ac87909355
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cd4f3162e46c3bb0f1fc4d3c52c7c33e60f56764458e0c8a73c3810b0a25f8c
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:185cc99aaa81b1b49b3ddc74aa6f97aa3036330983a7b69d52bd191057f9a5d5
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e37403c30cb4309e54e5defdde1906486716fc859274035d44aaac5d48a97ba
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba18a87eab7efca964506a3a18bdf9452eae09db2b3c77bd3a82db3283b5abc0
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.4339977502822876,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_detect_scale4/lora/sft/checkpoint-1250",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2410,11 +2410,100 @@
|
|
2410 |
"eval_steps_per_second": 0.782,
|
2411 |
"num_input_tokens_seen": 14173240,
|
2412 |
"step": 1350
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2413 |
}
|
2414 |
],
|
2415 |
"logging_steps": 5,
|
2416 |
"max_steps": 3400,
|
2417 |
-
"num_input_tokens_seen":
|
2418 |
"num_train_epochs": 1,
|
2419 |
"save_steps": 50,
|
2420 |
"stateful_callbacks": {
|
@@ -2429,7 +2518,7 @@
|
|
2429 |
"attributes": {}
|
2430 |
}
|
2431 |
},
|
2432 |
-
"total_flos":
|
2433 |
"train_batch_size": 1,
|
2434 |
"trial_name": null,
|
2435 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.4339977502822876,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_detect_scale4/lora/sft/checkpoint-1250",
|
4 |
+
"epoch": 0.3605459696111254,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 1400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2410 |
"eval_steps_per_second": 0.782,
|
2411 |
"num_input_tokens_seen": 14173240,
|
2412 |
"step": 1350
|
2413 |
+
},
|
2414 |
+
{
|
2415 |
+
"epoch": 0.3489569920164821,
|
2416 |
+
"grad_norm": 6.100571377010892,
|
2417 |
+
"learning_rate": 7.030720984447279e-05,
|
2418 |
+
"loss": 0.41,
|
2419 |
+
"num_input_tokens_seen": 14226032,
|
2420 |
+
"step": 1355
|
2421 |
+
},
|
2422 |
+
{
|
2423 |
+
"epoch": 0.3502446561936647,
|
2424 |
+
"grad_norm": 3.531812694789996,
|
2425 |
+
"learning_rate": 7.008477123264848e-05,
|
2426 |
+
"loss": 0.3751,
|
2427 |
+
"num_input_tokens_seen": 14278128,
|
2428 |
+
"step": 1360
|
2429 |
+
},
|
2430 |
+
{
|
2431 |
+
"epoch": 0.3515323203708473,
|
2432 |
+
"grad_norm": 13.528736327050117,
|
2433 |
+
"learning_rate": 6.986185761302224e-05,
|
2434 |
+
"loss": 0.4814,
|
2435 |
+
"num_input_tokens_seen": 14330624,
|
2436 |
+
"step": 1365
|
2437 |
+
},
|
2438 |
+
{
|
2439 |
+
"epoch": 0.3528199845480299,
|
2440 |
+
"grad_norm": 6.2453361475565305,
|
2441 |
+
"learning_rate": 6.963847425753403e-05,
|
2442 |
+
"loss": 0.5007,
|
2443 |
+
"num_input_tokens_seen": 14382416,
|
2444 |
+
"step": 1370
|
2445 |
+
},
|
2446 |
+
{
|
2447 |
+
"epoch": 0.35410764872521244,
|
2448 |
+
"grad_norm": 3.5868157849734925,
|
2449 |
+
"learning_rate": 6.941462644923318e-05,
|
2450 |
+
"loss": 0.4335,
|
2451 |
+
"num_input_tokens_seen": 14434896,
|
2452 |
+
"step": 1375
|
2453 |
+
},
|
2454 |
+
{
|
2455 |
+
"epoch": 0.35539531290239507,
|
2456 |
+
"grad_norm": 7.0930284762784925,
|
2457 |
+
"learning_rate": 6.919031948215335e-05,
|
2458 |
+
"loss": 0.4427,
|
2459 |
+
"num_input_tokens_seen": 14487152,
|
2460 |
+
"step": 1380
|
2461 |
+
},
|
2462 |
+
{
|
2463 |
+
"epoch": 0.35668297707957763,
|
2464 |
+
"grad_norm": 1.8673746248959853,
|
2465 |
+
"learning_rate": 6.896555866118741e-05,
|
2466 |
+
"loss": 0.42,
|
2467 |
+
"num_input_tokens_seen": 14539608,
|
2468 |
+
"step": 1385
|
2469 |
+
},
|
2470 |
+
{
|
2471 |
+
"epoch": 0.35797064125676026,
|
2472 |
+
"grad_norm": 3.29378340171418,
|
2473 |
+
"learning_rate": 6.87403493019619e-05,
|
2474 |
+
"loss": 0.4573,
|
2475 |
+
"num_input_tokens_seen": 14592168,
|
2476 |
+
"step": 1390
|
2477 |
+
},
|
2478 |
+
{
|
2479 |
+
"epoch": 0.3592583054339428,
|
2480 |
+
"grad_norm": 4.710051493913417,
|
2481 |
+
"learning_rate": 6.851469673071143e-05,
|
2482 |
+
"loss": 0.4341,
|
2483 |
+
"num_input_tokens_seen": 14643920,
|
2484 |
+
"step": 1395
|
2485 |
+
},
|
2486 |
+
{
|
2487 |
+
"epoch": 0.3605459696111254,
|
2488 |
+
"grad_norm": 5.46737560287727,
|
2489 |
+
"learning_rate": 6.828860628415253e-05,
|
2490 |
+
"loss": 0.437,
|
2491 |
+
"num_input_tokens_seen": 14697136,
|
2492 |
+
"step": 1400
|
2493 |
+
},
|
2494 |
+
{
|
2495 |
+
"epoch": 0.3605459696111254,
|
2496 |
+
"eval_loss": 0.46620962023735046,
|
2497 |
+
"eval_runtime": 38.4197,
|
2498 |
+
"eval_samples_per_second": 3.123,
|
2499 |
+
"eval_steps_per_second": 0.781,
|
2500 |
+
"num_input_tokens_seen": 14697136,
|
2501 |
+
"step": 1400
|
2502 |
}
|
2503 |
],
|
2504 |
"logging_steps": 5,
|
2505 |
"max_steps": 3400,
|
2506 |
+
"num_input_tokens_seen": 14697136,
|
2507 |
"num_train_epochs": 1,
|
2508 |
"save_steps": 50,
|
2509 |
"stateful_callbacks": {
|
|
|
2518 |
"attributes": {}
|
2519 |
}
|
2520 |
},
|
2521 |
+
"total_flos": 969656994627584.0,
|
2522 |
"train_batch_size": 1,
|
2523 |
"trial_name": null,
|
2524 |
"trial_params": null
|