ben81828 commited on
Commit
c9f0589
·
verified ·
1 Parent(s): 896b922

Training in progress, step 1600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1764f403b39c14f233fd2d6443f410cab81d66e3cc9f1d476e6e55a6642ff67
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af1a6642ef8e6fc99c76eecc34b9716928e3cb77cbad5191e812f07407a40f6e
3
  size 29034840
last-checkpoint/global_step1600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a1e0a99f40993045f70a7d1f668c31acb92eab13df1356fdd6a4e2f8531aee8
3
+ size 43429616
last-checkpoint/global_step1600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8c5f93b6cb2ecb145e33480d04275c956be11e05c85b5f894fa05e8e1d26794
3
+ size 43429616
last-checkpoint/global_step1600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:923bcefce8398e6986fbcdc09e8c8428d3b1795008063f23d6f71e3bcc41b61c
3
+ size 43429616
last-checkpoint/global_step1600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90c591777dd0823c282439ceb0015389baa53ac2730917ec7f0a8e22fc101365
3
+ size 43429616
last-checkpoint/global_step1600/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d4afb4aaf2fe09b00f009636090ac8140a92a92061b0beb56860dbb1563a322
3
+ size 637299
last-checkpoint/global_step1600/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99752500f7623dfbbb0b804e1c22e1cbd188635e5967f6ff430fde200bbb865f
3
+ size 637171
last-checkpoint/global_step1600/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf3c8ffb4a388a7c685af82e29e62c38dac41b19f00d659db4227bfe73f04b08
3
+ size 637171
last-checkpoint/global_step1600/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bab2075b924174a0c53aaeb6926f645d6f84f031f1c7a5a84a1e25330a267f11
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1550
 
1
+ global_step1600
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f1e48a120d69830576f7b582aa6cc46f0ca41d30015a7a674eaec3dcdfc0f09
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9279ed4b01716237e789d2631c1f29bc5d43c5633c014d4401de21b672c1b355
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4dbabb9273d3983e52a4a981b5f60f8c2e19da375765d05bb9f2caad284b9652
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca1990d68e57c70df5c56d395dd3f3befbe07b380521f4144677c20f6fe2a3eb
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:554ac925bb9c9ea292b7a41caac1cf75285511cf8aa440f37090891ee457a178
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0790066885525e1b9a9390a40ae27abd57abb47f031abface27890732f9e684
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5be5e00123fc0a321e41599b50e07be02f4c165504c601192e5c73f5f5437c30
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1325a2034fe48ebad4f00ac8a2b32ab5c4c43c2497712169a8e3b1112363d916
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8eb069683e7f84aa36296476346fc663361d9b05ad7b09b71f22f44afdb0ea48
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2487a6c511ed8055eb0842d87966b09ae8b62c1b4514727282ca413d6e9c4e2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.4339977502822876,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_detect_scale4/lora/sft/checkpoint-1250",
4
- "epoch": 0.39917589492660316,
5
  "eval_steps": 50,
6
- "global_step": 1550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2766,11 +2766,100 @@
2766
  "eval_steps_per_second": 0.78,
2767
  "num_input_tokens_seen": 16269896,
2768
  "step": 1550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2769
  }
2770
  ],
2771
  "logging_steps": 5,
2772
  "max_steps": 3400,
2773
- "num_input_tokens_seen": 16269896,
2774
  "num_train_epochs": 1,
2775
  "save_steps": 50,
2776
  "stateful_callbacks": {
@@ -2785,7 +2874,7 @@
2785
  "attributes": {}
2786
  }
2787
  },
2788
- "total_flos": 1073418423500800.0,
2789
  "train_batch_size": 1,
2790
  "trial_name": null,
2791
  "trial_params": null
 
1
  {
2
  "best_metric": 0.4339977502822876,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_detect_scale4/lora/sft/checkpoint-1250",
4
+ "epoch": 0.41205253669842906,
5
  "eval_steps": 50,
6
+ "global_step": 1600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2766
  "eval_steps_per_second": 0.78,
2767
  "num_input_tokens_seen": 16269896,
2768
  "step": 1550
2769
+ },
2770
+ {
2771
+ "epoch": 0.4004635591037857,
2772
+ "grad_norm": 4.920079251827062,
2773
+ "learning_rate": 6.109217817775139e-05,
2774
+ "loss": 0.4593,
2775
+ "num_input_tokens_seen": 16322496,
2776
+ "step": 1555
2777
+ },
2778
+ {
2779
+ "epoch": 0.40175122328096835,
2780
+ "grad_norm": 9.068094163618136,
2781
+ "learning_rate": 6.085494954896156e-05,
2782
+ "loss": 0.4865,
2783
+ "num_input_tokens_seen": 16375320,
2784
+ "step": 1560
2785
+ },
2786
+ {
2787
+ "epoch": 0.4030388874581509,
2788
+ "grad_norm": 9.316944070527988,
2789
+ "learning_rate": 6.061746419901388e-05,
2790
+ "loss": 0.4422,
2791
+ "num_input_tokens_seen": 16428096,
2792
+ "step": 1565
2793
+ },
2794
+ {
2795
+ "epoch": 0.4043265516353335,
2796
+ "grad_norm": 2.4617418860122213,
2797
+ "learning_rate": 6.0379727744471936e-05,
2798
+ "loss": 0.3538,
2799
+ "num_input_tokens_seen": 16480832,
2800
+ "step": 1570
2801
+ },
2802
+ {
2803
+ "epoch": 0.4056142158125161,
2804
+ "grad_norm": 5.028400110331736,
2805
+ "learning_rate": 6.014174580783794e-05,
2806
+ "loss": 0.3923,
2807
+ "num_input_tokens_seen": 16534016,
2808
+ "step": 1575
2809
+ },
2810
+ {
2811
+ "epoch": 0.4069018799896987,
2812
+ "grad_norm": 6.638266454273257,
2813
+ "learning_rate": 5.990352401741981e-05,
2814
+ "loss": 0.3967,
2815
+ "num_input_tokens_seen": 16586216,
2816
+ "step": 1580
2817
+ },
2818
+ {
2819
+ "epoch": 0.4081895441668813,
2820
+ "grad_norm": 6.928848680437489,
2821
+ "learning_rate": 5.9665068007197976e-05,
2822
+ "loss": 0.4212,
2823
+ "num_input_tokens_seen": 16639312,
2824
+ "step": 1585
2825
+ },
2826
+ {
2827
+ "epoch": 0.40947720834406387,
2828
+ "grad_norm": 4.2324092477507005,
2829
+ "learning_rate": 5.94263834166923e-05,
2830
+ "loss": 0.3489,
2831
+ "num_input_tokens_seen": 16692328,
2832
+ "step": 1590
2833
+ },
2834
+ {
2835
+ "epoch": 0.41076487252124644,
2836
+ "grad_norm": 5.607976113391715,
2837
+ "learning_rate": 5.918747589082853e-05,
2838
+ "loss": 0.4105,
2839
+ "num_input_tokens_seen": 16745088,
2840
+ "step": 1595
2841
+ },
2842
+ {
2843
+ "epoch": 0.41205253669842906,
2844
+ "grad_norm": 5.155332109104381,
2845
+ "learning_rate": 5.8948351079804875e-05,
2846
+ "loss": 0.3914,
2847
+ "num_input_tokens_seen": 16798768,
2848
+ "step": 1600
2849
+ },
2850
+ {
2851
+ "epoch": 0.41205253669842906,
2852
+ "eval_loss": 0.4657597243785858,
2853
+ "eval_runtime": 38.2951,
2854
+ "eval_samples_per_second": 3.134,
2855
+ "eval_steps_per_second": 0.783,
2856
+ "num_input_tokens_seen": 16798768,
2857
+ "step": 1600
2858
  }
2859
  ],
2860
  "logging_steps": 5,
2861
  "max_steps": 3400,
2862
+ "num_input_tokens_seen": 16798768,
2863
  "num_train_epochs": 1,
2864
  "save_steps": 50,
2865
  "stateful_callbacks": {
 
2874
  "attributes": {}
2875
  }
2876
  },
2877
+ "total_flos": 1108323298967552.0,
2878
  "train_batch_size": 1,
2879
  "trial_name": null,
2880
  "trial_params": null