sca255 commited on
Commit
d06471f
1 Parent(s): ee06330
adapter_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec35d12dc3c5170d40944676bdda9e3b30a38f45c85b7502fef5ec94e0268fe1
3
+ size 8751546
adapter_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"r": 32, "lora_alpha": 64, "target_modules": ["q_proj", "v_proj"], "peft_type": "LORA"}
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:458cb13d380536cf53bbe66c983fcbf6f8a0715e6483a147aadef55c51440474
3
+ size 8755146
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architectures": ["Qwen2ForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 151643, "eos_token_id": 151645, "hidden_act": "silu", "hidden_size": 1536, "initializer_range": 0.02, "intermediate_size": 8960, "max_position_embeddings": 32768, "max_window_layers": 28, "model_type": "qwen2", "num_attention_heads": 12, "num_hidden_layers": 28, "num_key_value_heads": 2, "rms_norm_eps": 1e-06, "rope_theta": 1000000.0, "sliding_window": 32768, "tie_word_embeddings": true, "torch_dtype": "bfloat16", "transformers_version": "4.40.1", "use_cache": true, "use_sliding_window": false, "vocab_size": 151936}
hf_model_0001_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7bceff30bfc2ab3b22c371fd194b7f53fe6747f6eb1c20606bd614d9255c43c
3
+ size 3087538238
log_1727689637.txt ADDED
File without changes
log_1727689703.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Step 1 | loss:2.7997887134552 lr:2e-05 tokens_per_second_per_gpu:52.61478307123564
2
+ Step 2 | loss:1.9635694026947021 lr:2e-05 tokens_per_second_per_gpu:226.8624210889525
3
+ Step 3 | loss:1.9302600622177124 lr:2e-05 tokens_per_second_per_gpu:236.70222120584987
4
+ Step 4 | loss:2.5740036964416504 lr:2e-05 tokens_per_second_per_gpu:238.58989736307112
5
+ Step 5 | loss:2.0926806926727295 lr:2e-05 tokens_per_second_per_gpu:239.97332858542924
6
+ Step 6 | loss:2.3473567962646484 lr:2e-05 tokens_per_second_per_gpu:232.5860867484364
7
+ Step 7 | loss:2.1897239685058594 lr:2e-05 tokens_per_second_per_gpu:232.82439936136524
8
+ Step 8 | loss:1.754281759262085 lr:2e-05 tokens_per_second_per_gpu:225.55916252687996
9
+ Step 9 | loss:2.008831024169922 lr:2e-05 tokens_per_second_per_gpu:194.797924361662
10
+ Step 10 | loss:2.4670588970184326 lr:2e-05 tokens_per_second_per_gpu:223.304648360278
log_1727689796.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Step 1 | loss:2.7997887134552 lr:2e-05 tokens_per_second_per_gpu:58.93894662593801
2
+ Step 2 | loss:1.9635694026947021 lr:2e-05 tokens_per_second_per_gpu:206.09949463642906
3
+ Step 3 | loss:1.9302600622177124 lr:2e-05 tokens_per_second_per_gpu:216.92945722999465
4
+ Step 4 | loss:2.5740036964416504 lr:2e-05 tokens_per_second_per_gpu:221.10292263228686
5
+ Step 5 | loss:2.0926806926727295 lr:2e-05 tokens_per_second_per_gpu:219.83932104406693
6
+ Step 6 | loss:2.3473567962646484 lr:2e-05 tokens_per_second_per_gpu:213.56235556642372
7
+ Step 7 | loss:2.1897239685058594 lr:2e-05 tokens_per_second_per_gpu:215.91101003805272
8
+ Step 8 | loss:1.754281759262085 lr:2e-05 tokens_per_second_per_gpu:209.87727428872608
9
+ Step 9 | loss:2.008831024169922 lr:2e-05 tokens_per_second_per_gpu:184.71083655140447
10
+ Step 10 | loss:2.4670588970184326 lr:2e-05 tokens_per_second_per_gpu:216.0040750210692
log_1727689903.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Step 1 | loss:2.7997887134552 lr:2e-05 tokens_per_second_per_gpu:58.703458626557335
2
+ Step 2 | loss:1.9635694026947021 lr:2e-05 tokens_per_second_per_gpu:206.65369753685596
3
+ Step 3 | loss:1.9302600622177124 lr:2e-05 tokens_per_second_per_gpu:217.77590086061775
4
+ Step 4 | loss:2.5740036964416504 lr:2e-05 tokens_per_second_per_gpu:220.9926872703538
5
+ Step 5 | loss:2.0926806926727295 lr:2e-05 tokens_per_second_per_gpu:220.38571144272848
6
+ Step 6 | loss:2.3473567962646484 lr:2e-05 tokens_per_second_per_gpu:212.24405759850805
7
+ Step 7 | loss:2.1897239685058594 lr:2e-05 tokens_per_second_per_gpu:215.0095077393116
8
+ Step 8 | loss:1.754281759262085 lr:2e-05 tokens_per_second_per_gpu:209.23794840574274
9
+ Step 9 | loss:2.008831024169922 lr:2e-05 tokens_per_second_per_gpu:184.21359242959744
10
+ Step 10 | loss:2.4670588970184326 lr:2e-05 tokens_per_second_per_gpu:215.28094465841255
log_1727690079.txt ADDED
File without changes
log_1727690219.txt ADDED
File without changes
log_1727690260.txt ADDED
File without changes
log_1727690467.txt ADDED
File without changes
log_1727690571.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Step 1 | loss:2.575782299041748 lr:2.0000000000000002e-07 tokens_per_second_per_gpu:242.20615514588104
2
+ Step 2 | loss:2.7398736476898193 lr:4.0000000000000003e-07 tokens_per_second_per_gpu:206.1023089570347
3
+ Step 3 | loss:2.9978206157684326 lr:6.000000000000001e-07 tokens_per_second_per_gpu:221.3595594187193
4
+ Step 4 | loss:2.7159111499786377 lr:8.000000000000001e-07 tokens_per_second_per_gpu:216.67620533325845
5
+ Step 5 | loss:2.4694080352783203 lr:1.0000000000000002e-06 tokens_per_second_per_gpu:237.62258381187226
6
+ Step 6 | loss:2.42387318611145 lr:1.2000000000000002e-06 tokens_per_second_per_gpu:239.15001745420253
7
+ Step 7 | loss:2.4981305599212646 lr:1.4000000000000001e-06 tokens_per_second_per_gpu:222.76815891988795
8
+ Step 8 | loss:2.6238675117492676 lr:1.6000000000000001e-06 tokens_per_second_per_gpu:224.34891246994644
9
+ Step 9 | loss:2.8125081062316895 lr:1.8000000000000001e-06 tokens_per_second_per_gpu:185.2454162476401
10
+ Step 10 | loss:2.553760290145874 lr:2.0000000000000003e-06 tokens_per_second_per_gpu:228.6423423443406
11
+ Step 11 | loss:2.7380504608154297 lr:2.2e-06 tokens_per_second_per_gpu:236.86353526582633
12
+ Step 12 | loss:2.4070146083831787 lr:2.4000000000000003e-06 tokens_per_second_per_gpu:237.81166375260392
13
+ Step 13 | loss:2.661756992340088 lr:2.6e-06 tokens_per_second_per_gpu:229.8780782426217
14
+ Step 14 | loss:2.443391799926758 lr:2.8000000000000003e-06 tokens_per_second_per_gpu:234.93454129084736
15
+ Step 15 | loss:2.693547487258911 lr:3e-06 tokens_per_second_per_gpu:236.07123654615438
16
+ Step 16 | loss:2.5031023025512695 lr:3.2000000000000003e-06 tokens_per_second_per_gpu:229.87349357484342