archit11/qwen-finetuned-model
Browse files- README.md +56 -0
- config.json +28 -0
- generation_config.json +14 -0
- model.safetensors +3 -0
- runs/Sep25_21-01-34_4cfab063c3dc/events.out.tfevents.1727298095.4cfab063c3dc.31.0 +3 -0
- runs/Sep26_11-43-19_d0d4ee6aec48/events.out.tfevents.1727351001.d0d4ee6aec48.30.0 +3 -0
- runs/Sep26_12-00-38_f57f1acbd579/events.out.tfevents.1727352040.f57f1acbd579.30.0 +3 -0
- runs/Sep26_12-02-07_f57f1acbd579/events.out.tfevents.1727352129.f57f1acbd579.30.1 +3 -0
- runs/Sep26_12-21-06_f57f1acbd579/events.out.tfevents.1727353267.f57f1acbd579.30.2 +3 -0
- runs/Sep26_12-26-11_f57f1acbd579/events.out.tfevents.1727353572.f57f1acbd579.30.3 +3 -0
- runs/Sep26_15-12-07_e5feb1dc4f89/events.out.tfevents.1727363530.e5feb1dc4f89.30.0 +3 -0
- runs/Sep26_15-12-41_e5feb1dc4f89/events.out.tfevents.1727363562.e5feb1dc4f89.30.1 +3 -0
- runs/Sep26_15-14-01_e5feb1dc4f89/events.out.tfevents.1727363643.e5feb1dc4f89.211.0 +3 -0
- runs/Sep26_15-15-33_e5feb1dc4f89/events.out.tfevents.1727363735.e5feb1dc4f89.323.0 +3 -0
- runs/Sep27_10-08-47_9064e4144dc3/events.out.tfevents.1727431730.9064e4144dc3.30.0 +3 -0
- runs/Sep27_10-10-07_9064e4144dc3/events.out.tfevents.1727431809.9064e4144dc3.182.0 +3 -0
- runs/Sep27_10-19-01_9064e4144dc3/events.out.tfevents.1727432344.9064e4144dc3.299.0 +3 -0
- runs/Sep27_10-23-19_64474a00aeae/events.out.tfevents.1727432602.64474a00aeae.30.0 +3 -0
- runs/Sep27_10-25-50_64474a00aeae/events.out.tfevents.1727432753.64474a00aeae.210.0 +3 -0
- runs/Sep27_10-28-07_64474a00aeae/events.out.tfevents.1727432890.64474a00aeae.348.0 +3 -0
- runs/Sep27_10-28-47_64474a00aeae/events.out.tfevents.1727432930.64474a00aeae.466.0 +3 -0
- runs/Sep27_10-33-15_b9eeacab482f/events.out.tfevents.1727433198.b9eeacab482f.30.0 +3 -0
- runs/Sep27_10-36-14_b9eeacab482f/events.out.tfevents.1727433377.b9eeacab482f.198.0 +3 -0
- runs/Sep27_10-38-37_38a02e3fee74/events.out.tfevents.1727433521.38a02e3fee74.31.0 +3 -0
- runs/Sep27_10-46-39_38a02e3fee74/events.out.tfevents.1727434000.38a02e3fee74.298.0 +3 -0
- training_args.bin +3 -0
README.md
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: transformers
|
3 |
+
license: apache-2.0
|
4 |
+
base_model: Qwen/Qwen2.5-0.5B-Instruct
|
5 |
+
tags:
|
6 |
+
- generated_from_trainer
|
7 |
+
model-index:
|
8 |
+
- name: capybara_finetuned_results
|
9 |
+
results: []
|
10 |
+
---
|
11 |
+
|
12 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
13 |
+
should probably proofread and complete it, then remove this comment. -->
|
14 |
+
|
15 |
+
# capybara_finetuned_results
|
16 |
+
|
17 |
+
This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) on an unknown dataset.
|
18 |
+
|
19 |
+
## Model description
|
20 |
+
|
21 |
+
More information needed
|
22 |
+
|
23 |
+
## Intended uses & limitations
|
24 |
+
|
25 |
+
More information needed
|
26 |
+
|
27 |
+
## Training and evaluation data
|
28 |
+
|
29 |
+
More information needed
|
30 |
+
|
31 |
+
## Training procedure
|
32 |
+
|
33 |
+
### Training hyperparameters
|
34 |
+
|
35 |
+
The following hyperparameters were used during training:
|
36 |
+
- learning_rate: 0.0002
|
37 |
+
- train_batch_size: 2
|
38 |
+
- eval_batch_size: 16
|
39 |
+
- seed: 42
|
40 |
+
- gradient_accumulation_steps: 2
|
41 |
+
- total_train_batch_size: 4
|
42 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
43 |
+
- lr_scheduler_type: cosine
|
44 |
+
- lr_scheduler_warmup_steps: 5
|
45 |
+
- training_steps: 5
|
46 |
+
|
47 |
+
### Training results
|
48 |
+
|
49 |
+
|
50 |
+
|
51 |
+
### Framework versions
|
52 |
+
|
53 |
+
- Transformers 4.44.2
|
54 |
+
- Pytorch 2.4.0
|
55 |
+
- Datasets 3.0.0
|
56 |
+
- Tokenizers 0.19.1
|
config.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct",
|
3 |
+
"architectures": [
|
4 |
+
"Qwen2ForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 151643,
|
8 |
+
"eos_token_id": 151645,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 896,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 4864,
|
13 |
+
"max_position_embeddings": 32768,
|
14 |
+
"max_window_layers": 21,
|
15 |
+
"model_type": "qwen2",
|
16 |
+
"num_attention_heads": 14,
|
17 |
+
"num_hidden_layers": 24,
|
18 |
+
"num_key_value_heads": 2,
|
19 |
+
"rms_norm_eps": 1e-06,
|
20 |
+
"rope_theta": 1000000.0,
|
21 |
+
"sliding_window": null,
|
22 |
+
"tie_word_embeddings": true,
|
23 |
+
"torch_dtype": "bfloat16",
|
24 |
+
"transformers_version": "4.44.2",
|
25 |
+
"use_cache": true,
|
26 |
+
"use_sliding_window": false,
|
27 |
+
"vocab_size": 151936
|
28 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token_id": 151643,
|
3 |
+
"do_sample": true,
|
4 |
+
"eos_token_id": [
|
5 |
+
151645,
|
6 |
+
151643
|
7 |
+
],
|
8 |
+
"pad_token_id": 151643,
|
9 |
+
"repetition_penalty": 1.1,
|
10 |
+
"temperature": 0.7,
|
11 |
+
"top_k": 20,
|
12 |
+
"top_p": 0.8,
|
13 |
+
"transformers_version": "4.44.2"
|
14 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e5e67b3376ec4e55ed6f39d16231ee107e8e4ed18909e517a120113c5746d72
|
3 |
+
size 988097824
|
runs/Sep25_21-01-34_4cfab063c3dc/events.out.tfevents.1727298095.4cfab063c3dc.31.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc6d0e7c270ef299505935702fe8bbc35c208524759ddf0accbdc553a5062f56
|
3 |
+
size 5036
|
runs/Sep26_11-43-19_d0d4ee6aec48/events.out.tfevents.1727351001.d0d4ee6aec48.30.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b494de8a8ea7a3701e0e7b35e83e8a1300b1493dde54418205b319b38904fc2e
|
3 |
+
size 5589
|
runs/Sep26_12-00-38_f57f1acbd579/events.out.tfevents.1727352040.f57f1acbd579.30.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1ba49118d7ad41f6caee89268ee2e793091725e0a17e366eed9db28fb824481
|
3 |
+
size 5381
|
runs/Sep26_12-02-07_f57f1acbd579/events.out.tfevents.1727352129.f57f1acbd579.30.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fbc08be87b4ad365241ea9c0029f8b5eae5a34eb436afba460f56213118f838
|
3 |
+
size 5381
|
runs/Sep26_12-21-06_f57f1acbd579/events.out.tfevents.1727353267.f57f1acbd579.30.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c604e05c6204aaf3c35b956d16bbbab0af66fb3ec718286e0ba269856fa70039
|
3 |
+
size 5381
|
runs/Sep26_12-26-11_f57f1acbd579/events.out.tfevents.1727353572.f57f1acbd579.30.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af6f7fde64958993a4f57efc82c75a8249ca02f37f4dd6fe660e446e3de02981
|
3 |
+
size 5381
|
runs/Sep26_15-12-07_e5feb1dc4f89/events.out.tfevents.1727363530.e5feb1dc4f89.30.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a5e4baf0c76228074d12a34a139817eedfbfb074073c53140dc0e9d3cdeca45
|
3 |
+
size 4184
|
runs/Sep26_15-12-41_e5feb1dc4f89/events.out.tfevents.1727363562.e5feb1dc4f89.30.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:069b7ad4a2a7fca8b380617c75d4c5494ee7e5bbe98ea14b91077b0a26fbcf11
|
3 |
+
size 4184
|
runs/Sep26_15-14-01_e5feb1dc4f89/events.out.tfevents.1727363643.e5feb1dc4f89.211.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:125588e89b410b1b69c39410c6e0a5e07ed6fdfdc10b676af75b6fb5cd6c4631
|
3 |
+
size 4184
|
runs/Sep26_15-15-33_e5feb1dc4f89/events.out.tfevents.1727363735.e5feb1dc4f89.323.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8be393d695a3f4e0f33779f1c16ab906c8d3063f534308ed22ad5d36d6d75864
|
3 |
+
size 5233
|
runs/Sep27_10-08-47_9064e4144dc3/events.out.tfevents.1727431730.9064e4144dc3.30.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01e55e6f6854fefb808fd79e05e04901af10b7c159de59753c5fdf9c227f06fc
|
3 |
+
size 4184
|
runs/Sep27_10-10-07_9064e4144dc3/events.out.tfevents.1727431809.9064e4144dc3.182.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2e15ad5fea245c1d606db5da7237158d48e0292862a5dd097c31fa31043ac17
|
3 |
+
size 5024
|
runs/Sep27_10-19-01_9064e4144dc3/events.out.tfevents.1727432344.9064e4144dc3.299.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc7fdc591a9ee1a2a998c1c41f3c9cbcacf9337b0eb965c804858fac63655166
|
3 |
+
size 4184
|
runs/Sep27_10-23-19_64474a00aeae/events.out.tfevents.1727432602.64474a00aeae.30.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99c999ddaf9e8dcb247de53b16b6674ef6918ef92f4f9df7e69b7e9af9d4ca82
|
3 |
+
size 5024
|
runs/Sep27_10-25-50_64474a00aeae/events.out.tfevents.1727432753.64474a00aeae.210.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d11262d3bc0b3975101d244e268c5a686e01678d75a8d5fc6c81a41892d5d72e
|
3 |
+
size 4184
|
runs/Sep27_10-28-07_64474a00aeae/events.out.tfevents.1727432890.64474a00aeae.348.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:267dc47abd84436e8f4bdae1af7b8f7034a7b4fe88304b0ee22f182782c0e8f2
|
3 |
+
size 4184
|
runs/Sep27_10-28-47_64474a00aeae/events.out.tfevents.1727432930.64474a00aeae.466.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee8d3268375bb29c383316e7192007bf6fbc8f48def0ca164871c73ef337d55d
|
3 |
+
size 5025
|
runs/Sep27_10-33-15_b9eeacab482f/events.out.tfevents.1727433198.b9eeacab482f.30.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c94d70754f1e2d1d9f4a7d4711849155e8e3133463263239528b5b736e2a6f4a
|
3 |
+
size 5025
|
runs/Sep27_10-36-14_b9eeacab482f/events.out.tfevents.1727433377.b9eeacab482f.198.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2120e453a60d239fb8d0c51ad4098bc6390e8a89919c4a31467b87b8a192afdd
|
3 |
+
size 4184
|
runs/Sep27_10-38-37_38a02e3fee74/events.out.tfevents.1727433521.38a02e3fee74.31.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e104310edac54ae4f378549eb9b313c44e4a73b4912a0baa913e2af47c69bce
|
3 |
+
size 5024
|
runs/Sep27_10-46-39_38a02e3fee74/events.out.tfevents.1727434000.38a02e3fee74.298.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:619430453af2e3c6a291243550b040b46251eb9ff9b2f950c028db1ffda5b458
|
3 |
+
size 6392
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce49a369baed59bbbef66925572aedf8533a63e1a3ba8e8b5e2475cbf62941aa
|
3 |
+
size 5176
|