archit11 commited on
Commit
314ab9f
1 Parent(s): 43db899

archit11/qwen-finetuned-model

Browse files
Files changed (26) hide show
  1. README.md +56 -0
  2. config.json +28 -0
  3. generation_config.json +14 -0
  4. model.safetensors +3 -0
  5. runs/Sep25_21-01-34_4cfab063c3dc/events.out.tfevents.1727298095.4cfab063c3dc.31.0 +3 -0
  6. runs/Sep26_11-43-19_d0d4ee6aec48/events.out.tfevents.1727351001.d0d4ee6aec48.30.0 +3 -0
  7. runs/Sep26_12-00-38_f57f1acbd579/events.out.tfevents.1727352040.f57f1acbd579.30.0 +3 -0
  8. runs/Sep26_12-02-07_f57f1acbd579/events.out.tfevents.1727352129.f57f1acbd579.30.1 +3 -0
  9. runs/Sep26_12-21-06_f57f1acbd579/events.out.tfevents.1727353267.f57f1acbd579.30.2 +3 -0
  10. runs/Sep26_12-26-11_f57f1acbd579/events.out.tfevents.1727353572.f57f1acbd579.30.3 +3 -0
  11. runs/Sep26_15-12-07_e5feb1dc4f89/events.out.tfevents.1727363530.e5feb1dc4f89.30.0 +3 -0
  12. runs/Sep26_15-12-41_e5feb1dc4f89/events.out.tfevents.1727363562.e5feb1dc4f89.30.1 +3 -0
  13. runs/Sep26_15-14-01_e5feb1dc4f89/events.out.tfevents.1727363643.e5feb1dc4f89.211.0 +3 -0
  14. runs/Sep26_15-15-33_e5feb1dc4f89/events.out.tfevents.1727363735.e5feb1dc4f89.323.0 +3 -0
  15. runs/Sep27_10-08-47_9064e4144dc3/events.out.tfevents.1727431730.9064e4144dc3.30.0 +3 -0
  16. runs/Sep27_10-10-07_9064e4144dc3/events.out.tfevents.1727431809.9064e4144dc3.182.0 +3 -0
  17. runs/Sep27_10-19-01_9064e4144dc3/events.out.tfevents.1727432344.9064e4144dc3.299.0 +3 -0
  18. runs/Sep27_10-23-19_64474a00aeae/events.out.tfevents.1727432602.64474a00aeae.30.0 +3 -0
  19. runs/Sep27_10-25-50_64474a00aeae/events.out.tfevents.1727432753.64474a00aeae.210.0 +3 -0
  20. runs/Sep27_10-28-07_64474a00aeae/events.out.tfevents.1727432890.64474a00aeae.348.0 +3 -0
  21. runs/Sep27_10-28-47_64474a00aeae/events.out.tfevents.1727432930.64474a00aeae.466.0 +3 -0
  22. runs/Sep27_10-33-15_b9eeacab482f/events.out.tfevents.1727433198.b9eeacab482f.30.0 +3 -0
  23. runs/Sep27_10-36-14_b9eeacab482f/events.out.tfevents.1727433377.b9eeacab482f.198.0 +3 -0
  24. runs/Sep27_10-38-37_38a02e3fee74/events.out.tfevents.1727433521.38a02e3fee74.31.0 +3 -0
  25. runs/Sep27_10-46-39_38a02e3fee74/events.out.tfevents.1727434000.38a02e3fee74.298.0 +3 -0
  26. training_args.bin +3 -0
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: Qwen/Qwen2.5-0.5B-Instruct
5
+ tags:
6
+ - generated_from_trainer
7
+ model-index:
8
+ - name: capybara_finetuned_results
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ # capybara_finetuned_results
16
+
17
+ This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) on an unknown dataset.
18
+
19
+ ## Model description
20
+
21
+ More information needed
22
+
23
+ ## Intended uses & limitations
24
+
25
+ More information needed
26
+
27
+ ## Training and evaluation data
28
+
29
+ More information needed
30
+
31
+ ## Training procedure
32
+
33
+ ### Training hyperparameters
34
+
35
+ The following hyperparameters were used during training:
36
+ - learning_rate: 0.0002
37
+ - train_batch_size: 2
38
+ - eval_batch_size: 16
39
+ - seed: 42
40
+ - gradient_accumulation_steps: 2
41
+ - total_train_batch_size: 4
42
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
+ - lr_scheduler_type: cosine
44
+ - lr_scheduler_warmup_steps: 5
45
+ - training_steps: 5
46
+
47
+ ### Training results
48
+
49
+
50
+
51
+ ### Framework versions
52
+
53
+ - Transformers 4.44.2
54
+ - Pytorch 2.4.0
55
+ - Datasets 3.0.0
56
+ - Tokenizers 0.19.1
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 21,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 14,
17
+ "num_hidden_layers": 24,
18
+ "num_key_value_heads": 2,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_theta": 1000000.0,
21
+ "sliding_window": null,
22
+ "tie_word_embeddings": true,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.44.2",
25
+ "use_cache": true,
26
+ "use_sliding_window": false,
27
+ "vocab_size": 151936
28
+ }
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.1,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.44.2"
14
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e5e67b3376ec4e55ed6f39d16231ee107e8e4ed18909e517a120113c5746d72
3
+ size 988097824
runs/Sep25_21-01-34_4cfab063c3dc/events.out.tfevents.1727298095.4cfab063c3dc.31.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc6d0e7c270ef299505935702fe8bbc35c208524759ddf0accbdc553a5062f56
3
+ size 5036
runs/Sep26_11-43-19_d0d4ee6aec48/events.out.tfevents.1727351001.d0d4ee6aec48.30.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b494de8a8ea7a3701e0e7b35e83e8a1300b1493dde54418205b319b38904fc2e
3
+ size 5589
runs/Sep26_12-00-38_f57f1acbd579/events.out.tfevents.1727352040.f57f1acbd579.30.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1ba49118d7ad41f6caee89268ee2e793091725e0a17e366eed9db28fb824481
3
+ size 5381
runs/Sep26_12-02-07_f57f1acbd579/events.out.tfevents.1727352129.f57f1acbd579.30.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fbc08be87b4ad365241ea9c0029f8b5eae5a34eb436afba460f56213118f838
3
+ size 5381
runs/Sep26_12-21-06_f57f1acbd579/events.out.tfevents.1727353267.f57f1acbd579.30.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c604e05c6204aaf3c35b956d16bbbab0af66fb3ec718286e0ba269856fa70039
3
+ size 5381
runs/Sep26_12-26-11_f57f1acbd579/events.out.tfevents.1727353572.f57f1acbd579.30.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af6f7fde64958993a4f57efc82c75a8249ca02f37f4dd6fe660e446e3de02981
3
+ size 5381
runs/Sep26_15-12-07_e5feb1dc4f89/events.out.tfevents.1727363530.e5feb1dc4f89.30.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a5e4baf0c76228074d12a34a139817eedfbfb074073c53140dc0e9d3cdeca45
3
+ size 4184
runs/Sep26_15-12-41_e5feb1dc4f89/events.out.tfevents.1727363562.e5feb1dc4f89.30.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:069b7ad4a2a7fca8b380617c75d4c5494ee7e5bbe98ea14b91077b0a26fbcf11
3
+ size 4184
runs/Sep26_15-14-01_e5feb1dc4f89/events.out.tfevents.1727363643.e5feb1dc4f89.211.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:125588e89b410b1b69c39410c6e0a5e07ed6fdfdc10b676af75b6fb5cd6c4631
3
+ size 4184
runs/Sep26_15-15-33_e5feb1dc4f89/events.out.tfevents.1727363735.e5feb1dc4f89.323.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8be393d695a3f4e0f33779f1c16ab906c8d3063f534308ed22ad5d36d6d75864
3
+ size 5233
runs/Sep27_10-08-47_9064e4144dc3/events.out.tfevents.1727431730.9064e4144dc3.30.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01e55e6f6854fefb808fd79e05e04901af10b7c159de59753c5fdf9c227f06fc
3
+ size 4184
runs/Sep27_10-10-07_9064e4144dc3/events.out.tfevents.1727431809.9064e4144dc3.182.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2e15ad5fea245c1d606db5da7237158d48e0292862a5dd097c31fa31043ac17
3
+ size 5024
runs/Sep27_10-19-01_9064e4144dc3/events.out.tfevents.1727432344.9064e4144dc3.299.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc7fdc591a9ee1a2a998c1c41f3c9cbcacf9337b0eb965c804858fac63655166
3
+ size 4184
runs/Sep27_10-23-19_64474a00aeae/events.out.tfevents.1727432602.64474a00aeae.30.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99c999ddaf9e8dcb247de53b16b6674ef6918ef92f4f9df7e69b7e9af9d4ca82
3
+ size 5024
runs/Sep27_10-25-50_64474a00aeae/events.out.tfevents.1727432753.64474a00aeae.210.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d11262d3bc0b3975101d244e268c5a686e01678d75a8d5fc6c81a41892d5d72e
3
+ size 4184
runs/Sep27_10-28-07_64474a00aeae/events.out.tfevents.1727432890.64474a00aeae.348.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:267dc47abd84436e8f4bdae1af7b8f7034a7b4fe88304b0ee22f182782c0e8f2
3
+ size 4184
runs/Sep27_10-28-47_64474a00aeae/events.out.tfevents.1727432930.64474a00aeae.466.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee8d3268375bb29c383316e7192007bf6fbc8f48def0ca164871c73ef337d55d
3
+ size 5025
runs/Sep27_10-33-15_b9eeacab482f/events.out.tfevents.1727433198.b9eeacab482f.30.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c94d70754f1e2d1d9f4a7d4711849155e8e3133463263239528b5b736e2a6f4a
3
+ size 5025
runs/Sep27_10-36-14_b9eeacab482f/events.out.tfevents.1727433377.b9eeacab482f.198.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2120e453a60d239fb8d0c51ad4098bc6390e8a89919c4a31467b87b8a192afdd
3
+ size 4184
runs/Sep27_10-38-37_38a02e3fee74/events.out.tfevents.1727433521.38a02e3fee74.31.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e104310edac54ae4f378549eb9b313c44e4a73b4912a0baa913e2af47c69bce
3
+ size 5024
runs/Sep27_10-46-39_38a02e3fee74/events.out.tfevents.1727434000.38a02e3fee74.298.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:619430453af2e3c6a291243550b040b46251eb9ff9b2f950c028db1ffda5b458
3
+ size 6392
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce49a369baed59bbbef66925572aedf8533a63e1a3ba8e8b5e2475cbf62941aa
3
+ size 5176