zera09 commited on
Commit
7d70d62
1 Parent(s): 138ddb8

End of training

Browse files
README.md CHANGED
@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 1.8493
22
 
23
  ## Model description
24
 
@@ -52,17 +52,17 @@ The following hyperparameters were used during training:
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:-----:|:----:|:---------------:|
55
- | 0.8703 | 1.0 | 394 | 1.8750 |
56
- | 0.7032 | 2.0 | 788 | 1.8472 |
57
- | 0.6907 | 3.0 | 1182 | 1.8444 |
58
- | 0.7402 | 4.0 | 1576 | 1.8483 |
59
- | 0.6788 | 5.0 | 1970 | 1.8493 |
60
 
61
 
62
  ### Framework versions
63
 
64
- - PEFT 0.11.1
65
- - Transformers 4.44.0
66
- - Pytorch 2.3.1
67
- - Datasets 2.20.0
68
  - Tokenizers 0.19.1
 
18
 
19
  This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 1.6392
22
 
23
  ## Model description
24
 
 
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:-----:|:----:|:---------------:|
55
+ | 0.561 | 1.0 | 394 | 1.5800 |
56
+ | 0.5974 | 2.0 | 788 | 1.5667 |
57
+ | 0.5388 | 3.0 | 1182 | 1.5681 |
58
+ | 0.496 | 4.0 | 1576 | 1.5996 |
59
+ | 0.4139 | 5.0 | 1970 | 1.6392 |
60
 
61
 
62
  ### Framework versions
63
 
64
+ - PEFT 0.12.1.dev0
65
+ - Transformers 4.45.0.dev0
66
+ - Pytorch 2.4.1+cu121
67
+ - Datasets 3.0.0
68
  - Tokenizers 0.19.1
adapter_config.json CHANGED
@@ -10,17 +10,18 @@
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
- "lora_alpha": 32,
14
  "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
- "r": 16,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "lm_head"
 
24
  ],
25
  "task_type": "CAUSAL_LM",
26
  "use_dora": false,
 
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
+ "lora_alpha": 16,
14
  "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
+ "r": 8,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "gate_proj",
24
+ "o_proj"
25
  ],
26
  "task_type": "CAUSAL_LM",
27
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8dbd0f2aff0fefbdb352d1c94a22ee1aa6bcb08ab4181e3b6b5385d5e2bd852
3
- size 1054925248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:653c17a00f1b1b83258a5941869da3e55c043965851913b1b193ffe68a4f9b33
3
+ size 2128659168
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cc34c9517b435487c2b71647d20346b334d2cf646c60452cb01e4ee0b381927
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3f9c4081453a7ff67aa451b1b5484e84eb9bda7980f390466844f515273b467
3
  size 5432