llama3.1_8b_bwgenerator

Browse files

Files changed (4) hide show

README.md +10 -18
adapter_config.json +3 -3
adapter_model.safetensors +2 -2
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1141
 ## Model description
@@ -45,28 +45,20 @@ The following hyperparameters were used during training:
 - total_train_batch_size: 256
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 2
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 1.1896        | 0.1214 | 20   | 0.3793          |
-| 0.3219        | 0.2427 | 40   | 0.2798          |
-| 0.2583        | 0.3641 | 60   | 0.2367          |
-| 0.2204        | 0.4854 | 80   | 0.2002          |
-| 0.1785        | 0.6068 | 100  | 0.1566          |
-| 0.1488        | 0.7281 | 120  | 0.1404          |
-| 0.1391        | 0.8495 | 140  | 0.1348          |
-| 0.1332        | 0.9708 | 160  | 0.1310          |
-| 0.1281        | 1.0922 | 180  | 0.1254          |
-| 0.1246        | 1.2135 | 200  | 0.1229          |
-| 0.1229        | 1.3349 | 220  | 0.1200          |
-| 0.1202        | 1.4562 | 240  | 0.1179          |
-| 0.1185        | 1.5776 | 260  | 0.1164          |
-| 0.1166        | 1.6989 | 280  | 0.1154          |
-| 0.1165        | 1.8203 | 300  | 0.1143          |
-| 0.1155        | 1.9416 | 320  | 0.1141          |
 ### Framework versions

 This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1528
 ## Model description
 - total_train_batch_size: 256
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 1
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 1.2317        | 0.1246 | 20   | 0.3990          |
+| 0.3324        | 0.2492 | 40   | 0.2884          |
+| 0.2665        | 0.3738 | 60   | 0.2478          |
+| 0.2326        | 0.4983 | 80   | 0.2174          |
+| 0.2072        | 0.6229 | 100  | 0.1941          |
+| 0.1818        | 0.7475 | 120  | 0.1687          |
+| 0.1631        | 0.8721 | 140  | 0.1555          |
+| 0.1543        | 0.9967 | 160  | 0.1528          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "meta-llama/Meta-Llama-3.1-8B-Instruct",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
@@ -22,8 +22,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": null,
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dc762beff544d34c8dd52c39eac6066cd7438f49e982c739329380881dc31607
-size 6832728

 version https://git-lfs.github.com/spec/v1
+oid sha256:12ac22bba7490e37ed9784769714d9b6253b1e5b38f9536df4e96909f089dbab
+size 6834904

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e944f2220951ec046e769adf16573a894f011721ea6967d90e49a82f20f1b3a
 size 5560

 version https://git-lfs.github.com/spec/v1
+oid sha256:b17beb214a9143f2befed0a63928bdeddafa1fe89cacf297ce36eb0396eee0a8
 size 5560