ben81828 commited on
Commit
e6dd391
·
verified ·
1 Parent(s): 2d0770b

Training in progress, step 2300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfdde134a211d113ac15985ca670d8214e7b0ef4860c3a3926ee43121c0822b6
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83713538d7c41870e13178745ef172d71bfaba5ed873b0c7d6d31c872dec52a0
3
  size 29034840
last-checkpoint/global_step2300/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c480d4e96a0bd563639b1776b600fa1c047335754c66e02c46c99cffe4037311
3
+ size 43429616
last-checkpoint/global_step2300/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bc0760fe049d3fe1bd0af0fdf9a5bf355726668c6f32520fa8e32029accb34a
3
+ size 43429616
last-checkpoint/global_step2300/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e61aef05def6fd2d52ec80bdf2244163052aec271c3348bf360a7fa6458946f4
3
+ size 43429616
last-checkpoint/global_step2300/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51d3589648f3344e9d14f0b2a2e30b012d43fc0c721730a9b8ed0a02ad5e6bb9
3
+ size 43429616
last-checkpoint/global_step2300/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62fa8b00108fdb11131034658fba7089061501022b3de6f5e243a3686a644276
3
+ size 637299
last-checkpoint/global_step2300/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acf246967f429ff75d5cfa4a4b8804f191fdc81e48b9cd0c807a5b4a379efc21
3
+ size 637171
last-checkpoint/global_step2300/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b5404d462155135ee507d74ac8a0754e37f4d4a952bf81ff326ba7ab6b40985
3
+ size 637171
last-checkpoint/global_step2300/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30995616fd56844cf161de30a81dacea08f0a755be391d0f4ff601fe522ca55a
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2250
 
1
+ global_step2300
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3187a61ccc2722c440dc24ae4a6eefe6b9e5daccf9e92473bbb4483c7751ea77
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d25cbcbbaa0866ea9c7365cb49b84e805db119693e615f5a1898a6ebfe997e8
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0f2a0df922fb3337cf2562745ebe8d5adf433ca45cb4e3da33a21b48183c000
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a883389afac12125c2c6bf62631b7de0220fdb0020d24cd0c6e8f8858dd3b362
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4d84b5276f687f44c9af60b1e41cd7b93a6d1659e36831a7bc021b5635d663b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:577d49de6d60035e159d9ebb1e6eabef79a55787b14ecea93a6a93c242661779
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d69159433c88b97106cf21b92eb5a3f66f0c826aa268d82a47b3faed1ac86cd
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5f2dd1c21e06806a9ce39eeab45734dfb8a62b829f91a86d1f65f13102d6242
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6909ad505e808e7099dbcdd8062e5535575cbfa3b4d3a7b7d3390e6a93ed3b49
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25aeb77fef385fcfd5658b6aaea11fb8aa276239b7ba19a4f3504b86f45fbcc9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.4194311797618866,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_detect_scale4/lora/sft/checkpoint-1800",
4
- "epoch": 0.5794488797321659,
5
  "eval_steps": 50,
6
- "global_step": 2250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4012,11 +4012,100 @@
4012
  "eval_steps_per_second": 0.787,
4013
  "num_input_tokens_seen": 23626952,
4014
  "step": 2250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4015
  }
4016
  ],
4017
  "logging_steps": 5,
4018
  "max_steps": 3400,
4019
- "num_input_tokens_seen": 23626952,
4020
  "num_train_epochs": 1,
4021
  "save_steps": 50,
4022
  "stateful_callbacks": {
@@ -4031,7 +4120,7 @@
4031
  "attributes": {}
4032
  }
4033
  },
4034
- "total_flos": 1558781764042752.0,
4035
  "train_batch_size": 1,
4036
  "trial_name": null,
4037
  "trial_params": null
 
1
  {
2
  "best_metric": 0.4194311797618866,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_detect_scale4/lora/sft/checkpoint-1800",
4
+ "epoch": 0.5923255215039918,
5
  "eval_steps": 50,
6
+ "global_step": 2300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4012
  "eval_steps_per_second": 0.787,
4013
  "num_input_tokens_seen": 23626952,
4014
  "step": 2250
4015
+ },
4016
+ {
4017
+ "epoch": 0.5807365439093485,
4018
+ "grad_norm": 7.013960747751967,
4019
+ "learning_rate": 2.7931010410023518e-05,
4020
+ "loss": 0.3544,
4021
+ "num_input_tokens_seen": 23680112,
4022
+ "step": 2255
4023
+ },
4024
+ {
4025
+ "epoch": 0.582024208086531,
4026
+ "grad_norm": 4.605744038285181,
4027
+ "learning_rate": 2.771308221117309e-05,
4028
+ "loss": 0.3467,
4029
+ "num_input_tokens_seen": 23731896,
4030
+ "step": 2260
4031
+ },
4032
+ {
4033
+ "epoch": 0.5833118722637136,
4034
+ "grad_norm": 4.9921005761457895,
4035
+ "learning_rate": 2.749568110121545e-05,
4036
+ "loss": 0.3374,
4037
+ "num_input_tokens_seen": 23784616,
4038
+ "step": 2265
4039
+ },
4040
+ {
4041
+ "epoch": 0.5845995364408962,
4042
+ "grad_norm": 4.366796972583381,
4043
+ "learning_rate": 2.7278812221718924e-05,
4044
+ "loss": 0.3509,
4045
+ "num_input_tokens_seen": 23836920,
4046
+ "step": 2270
4047
+ },
4048
+ {
4049
+ "epoch": 0.5858872006180788,
4050
+ "grad_norm": 3.5862787852890876,
4051
+ "learning_rate": 2.7062480701664488e-05,
4052
+ "loss": 0.3014,
4053
+ "num_input_tokens_seen": 23890792,
4054
+ "step": 2275
4055
+ },
4056
+ {
4057
+ "epoch": 0.5871748647952614,
4058
+ "grad_norm": 7.030699453643037,
4059
+ "learning_rate": 2.6846691657324473e-05,
4060
+ "loss": 0.4672,
4061
+ "num_input_tokens_seen": 23943264,
4062
+ "step": 2280
4063
+ },
4064
+ {
4065
+ "epoch": 0.588462528972444,
4066
+ "grad_norm": 3.981559816955943,
4067
+ "learning_rate": 2.663145019214163e-05,
4068
+ "loss": 0.2817,
4069
+ "num_input_tokens_seen": 23995760,
4070
+ "step": 2285
4071
+ },
4072
+ {
4073
+ "epoch": 0.5897501931496266,
4074
+ "grad_norm": 4.699964630974459,
4075
+ "learning_rate": 2.6416761396608362e-05,
4076
+ "loss": 0.3538,
4077
+ "num_input_tokens_seen": 24048696,
4078
+ "step": 2290
4079
+ },
4080
+ {
4081
+ "epoch": 0.5910378573268091,
4082
+ "grad_norm": 8.241455514660794,
4083
+ "learning_rate": 2.6202630348146324e-05,
4084
+ "loss": 0.3018,
4085
+ "num_input_tokens_seen": 24102248,
4086
+ "step": 2295
4087
+ },
4088
+ {
4089
+ "epoch": 0.5923255215039918,
4090
+ "grad_norm": 3.2050310147654604,
4091
+ "learning_rate": 2.598906211098643e-05,
4092
+ "loss": 0.3322,
4093
+ "num_input_tokens_seen": 24154624,
4094
+ "step": 2300
4095
+ },
4096
+ {
4097
+ "epoch": 0.5923255215039918,
4098
+ "eval_loss": 0.4960116744041443,
4099
+ "eval_runtime": 38.142,
4100
+ "eval_samples_per_second": 3.146,
4101
+ "eval_steps_per_second": 0.787,
4102
+ "num_input_tokens_seen": 24154624,
4103
+ "step": 2300
4104
  }
4105
  ],
4106
  "logging_steps": 5,
4107
  "max_steps": 3400,
4108
+ "num_input_tokens_seen": 24154624,
4109
  "num_train_epochs": 1,
4110
  "save_steps": 50,
4111
  "stateful_callbacks": {
 
4120
  "attributes": {}
4121
  }
4122
  },
4123
+ "total_flos": 1593572224925696.0,
4124
  "train_batch_size": 1,
4125
  "trial_name": null,
4126
  "trial_params": null