Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

README.md +59 -80
adapter_config.json +3 -3
adapter_model.safetensors +1 -1
all_results.json +10 -10
eval_results.json +5 -5
train_results.json +6 -6
trainer_log.jsonl +0 -0
trainer_state.json +0 -0
training_args.bin +1 -1
training_eval_loss.png +0 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -19,7 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [meta-llama/Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) on the akash_unifo_757 dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.0068
 ## Model description
@@ -47,90 +47,69 @@ The following hyperparameters were used during training:
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1
-- num_epochs: 5.0
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 0.0269        | 0.0640 | 25   | 0.0353          |
-| 0.0361        | 0.1281 | 50   | 0.0214          |
-| 0.0301        | 0.1921 | 75   | 0.0189          |
-| 0.0217        | 0.2562 | 100  | 0.0173          |
-| 0.0186        | 0.3202 | 125  | 0.0162          |
-| 0.0216        | 0.3842 | 150  | 0.0153          |
-| 0.0138        | 0.4483 | 175  | 0.0153          |
-| 0.021         | 0.5123 | 200  | 0.0146          |
-| 0.0166        | 0.5764 | 225  | 0.0133          |
-| 0.0109        | 0.6404 | 250  | 0.0114          |
-| 0.0127        | 0.7045 | 275  | 0.0117          |
-| 0.0095        | 0.7685 | 300  | 0.0112          |
-| 0.0142        | 0.8325 | 325  | 0.0110          |
-| 0.0134        | 0.8966 | 350  | 0.0104          |
-| 0.0124        | 0.9606 | 375  | 0.0097          |
-| 0.0122        | 1.0250 | 400  | 0.0095          |
-| 0.0211        | 1.0890 | 425  | 0.0092          |
-| 0.01          | 1.1531 | 450  | 0.0100          |
-| 0.0158        | 1.2171 | 475  | 0.0089          |
-| 0.0176        | 1.2811 | 500  | 0.0087          |
-| 0.0073        | 1.3452 | 525  | 0.0082          |
-| 0.0114        | 1.4092 | 550  | 0.0092          |
-| 0.0085        | 1.4733 | 575  | 0.0086          |
-| 0.0139        | 1.5373 | 600  | 0.0098          |
-| 0.0101        | 1.6013 | 625  | 0.0087          |
-| 0.0111        | 1.6654 | 650  | 0.0085          |
-| 0.0075        | 1.7294 | 675  | 0.0078          |
-| 0.0077        | 1.7935 | 700  | 0.0083          |
-| 0.0105        | 1.8575 | 725  | 0.0072          |
-| 0.0089        | 1.9215 | 750  | 0.0078          |
-| 0.0132        | 1.9856 | 775  | 0.0084          |
-| 0.0104        | 2.0500 | 800  | 0.0091          |
-| 0.0084        | 2.1140 | 825  | 0.0082          |
-| 0.0081        | 2.1780 | 850  | 0.0083          |
-| 0.0065        | 2.2421 | 875  | 0.0078          |
-| 0.0055        | 2.3061 | 900  | 0.0076          |
-| 0.0043        | 2.3702 | 925  | 0.0074          |
-| 0.0063        | 2.4342 | 950  | 0.0073          |
-| 0.0056        | 2.4982 | 975  | 0.0079          |
-| 0.0076        | 2.5623 | 1000 | 0.0073          |
-| 0.0054        | 2.6263 | 1025 | 0.0073          |
-| 0.01          | 2.6904 | 1050 | 0.0068          |
-| 0.0064        | 2.7544 | 1075 | 0.0075          |
-| 0.0066        | 2.8184 | 1100 | 0.0071          |
-| 0.0168        | 2.8825 | 1125 | 0.0086          |
-| 0.013         | 2.9465 | 1150 | 0.0069          |
-| 0.0058        | 3.0109 | 1175 | 0.0071          |
-| 0.0057        | 3.0749 | 1200 | 0.0070          |
-| 0.0075        | 3.1390 | 1225 | 0.0068          |
-| 0.0035        | 3.2030 | 1250 | 0.0073          |
-| 0.0065        | 3.2671 | 1275 | 0.0074          |
-| 0.0039        | 3.3311 | 1300 | 0.0071          |
-| 0.0047        | 3.3951 | 1325 | 0.0070          |
-| 0.0046        | 3.4592 | 1350 | 0.0067          |
-| 0.0047        | 3.5232 | 1375 | 0.0071          |
-| 0.0039        | 3.5873 | 1400 | 0.0070          |
-| 0.0049        | 3.6513 | 1425 | 0.0072          |
-| 0.0045        | 3.7153 | 1450 | 0.0070          |
-| 0.0043        | 3.7794 | 1475 | 0.0068          |
-| 0.0036        | 3.8434 | 1500 | 0.0068          |
-| 0.0056        | 3.9075 | 1525 | 0.0067          |
-| 0.0049        | 3.9715 | 1550 | 0.0066          |
-| 0.002         | 4.0359 | 1575 | 0.0068          |
-| 0.0038        | 4.0999 | 1600 | 0.0070          |
-| 0.0017        | 4.1639 | 1625 | 0.0074          |
-| 0.0029        | 4.2280 | 1650 | 0.0070          |
-| 0.0031        | 4.2920 | 1675 | 0.0072          |
-| 0.0022        | 4.3561 | 1700 | 0.0071          |
-| 0.0024        | 4.4201 | 1725 | 0.0072          |
-| 0.0025        | 4.4841 | 1750 | 0.0071          |
-| 0.0026        | 4.5482 | 1775 | 0.0072          |
-| 0.0023        | 4.6122 | 1800 | 0.0072          |
-| 0.0023        | 4.6763 | 1825 | 0.0072          |
-| 0.0019        | 4.7403 | 1850 | 0.0072          |
-| 0.0025        | 4.8044 | 1875 | 0.0072          |
-| 0.0023        | 4.8684 | 1900 | 0.0072          |
-| 0.0021        | 4.9324 | 1925 | 0.0072          |
-| 0.0028        | 4.9965 | 1950 | 0.0072          |
 ### Framework versions

 This model is a fine-tuned version of [meta-llama/Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) on the akash_unifo_757 dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0018
 ## Model description
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 25.0
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 0.0124        | 0.0808 | 25   | 0.0126          |
+| 0.0042        | 0.1616 | 50   | 0.0068          |
+| 0.0074        | 0.2424 | 75   | 0.0058          |
+| 0.0052        | 0.3232 | 100  | 0.0052          |
+| 0.0041        | 0.4040 | 125  | 0.0048          |
+| 0.0079        | 0.4848 | 150  | 0.0044          |
+| 0.0097        | 0.5657 | 175  | 0.0041          |
+| 0.0049        | 0.6465 | 200  | 0.0038          |
+| 0.0053        | 0.7273 | 225  | 0.0038          |
+| 0.0041        | 0.8081 | 250  | 0.0036          |
+| 0.0025        | 0.8889 | 275  | 0.0034          |
+| 0.0038        | 0.9697 | 300  | 0.0035          |
+| 0.0025        | 1.0505 | 325  | 0.0034          |
+| 0.0025        | 1.1313 | 350  | 0.0032          |
+| 0.0028        | 1.2121 | 375  | 0.0030          |
+| 0.0065        | 1.2929 | 400  | 0.0031          |
+| 0.0021        | 1.3737 | 425  | 0.0030          |
+| 0.0026        | 1.4545 | 450  | 0.0030          |
+| 0.0035        | 1.5354 | 475  | 0.0028          |
+| 0.0029        | 1.6162 | 500  | 0.0028          |
+| 0.0046        | 1.6970 | 525  | 0.0027          |
+| 0.004         | 1.7778 | 550  | 0.0028          |
+| 0.0038        | 1.8586 | 575  | 0.0026          |
+| 0.0064        | 1.9394 | 600  | 0.0026          |
+| 0.0044        | 2.0202 | 625  | 0.0026          |
+| 0.0022        | 2.1010 | 650  | 0.0025          |
+| 0.0019        | 2.1818 | 675  | 0.0024          |
+| 0.0057        | 2.2626 | 700  | 0.0024          |
+| 0.0025        | 2.3434 | 725  | 0.0024          |
+| 0.0031        | 2.4242 | 750  | 0.0024          |
+| 0.0027        | 2.5051 | 775  | 0.0023          |
+| 0.0024        | 2.5859 | 800  | 0.0022          |
+| 0.0027        | 2.6667 | 825  | 0.0022          |
+| 0.0023        | 2.7475 | 850  | 0.0023          |
+| 0.0019        | 2.8283 | 875  | 0.0022          |
+| 0.002         | 2.9091 | 900  | 0.0021          |
+| 0.0025        | 2.9899 | 925  | 0.0021          |
+| 0.0014        | 3.0707 | 950  | 0.0022          |
+| 0.0012        | 3.1515 | 975  | 0.0021          |
+| 0.0018        | 3.2323 | 1000 | 0.0020          |
+| 0.0022        | 3.3131 | 1025 | 0.0019          |
+| 0.0018        | 3.3939 | 1050 | 0.0018          |
+| 0.002         | 3.4747 | 1075 | 0.0019          |
+| 0.0019        | 3.5556 | 1100 | 0.0020          |
+| 0.0014        | 3.6364 | 1125 | 0.0019          |
+| 0.0026        | 3.7172 | 1150 | 0.0018          |
+| 0.0021        | 3.7980 | 1175 | 0.0019          |
+| 0.0022        | 3.8788 | 1200 | 0.0019          |
+| 0.0028        | 3.9596 | 1225 | 0.0019          |
+| 0.0009        | 4.0404 | 1250 | 0.0020          |
+| 0.0015        | 4.1212 | 1275 | 0.0018          |
+| 0.0011        | 4.2020 | 1300 | 0.0018          |
+| 0.002         | 4.2828 | 1325 | 0.0019          |
+| 0.0014        | 4.3636 | 1350 | 0.0020          |
+| 0.0011        | 4.4444 | 1375 | 0.0019          |
+| 0.0021        | 4.5253 | 1400 | 0.0018          |
+| 0.0025        | 4.6061 | 1425 | 0.0019          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -22,11 +22,11 @@
   "target_modules": [
     "up_proj",
     "o_proj",
     "down_proj",
-    "k_proj",
     "gate_proj",
-    "v_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "target_modules": [
     "up_proj",
     "o_proj",
+    "v_proj",
     "down_proj",
+    "q_proj",
     "gate_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95973e7d2b58f166a03ebc09702e6767a3e9c895195e84bdb25e4a96d72ebf10
 size 1556140392

 version https://git-lfs.github.com/spec/v1
+oid sha256:e5633f2cc6614a68045fd925c497ececfd397080991e2d60a2de156f50fc73d3
 size 1556140392

all_results.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
-    "epoch": 4.996477745757285,
-    "eval_loss": 0.006774455308914185,
-    "eval_runtime": 15.6822,
-    "eval_samples_per_second": 6.377,
-    "eval_steps_per_second": 1.594,
-    "total_flos": 3.832695062431138e+18,
-    "train_loss": 0.009293928827003887,
-    "train_runtime": 29383.8827,
-    "train_samples_per_second": 2.125,
-    "train_steps_per_second": 0.066
 }

 {
+    "epoch": 4.606060606060606,
+    "eval_loss": 0.0018192199058830738,
+    "eval_runtime": 15.5822,
+    "eval_samples_per_second": 6.418,
+    "eval_steps_per_second": 1.604,
+    "total_flos": 2.884558791231996e+18,
+    "train_loss": 0.003744818231442192,
+    "train_runtime": 22029.3077,
+    "train_samples_per_second": 11.235,
+    "train_steps_per_second": 0.351
 }

eval_results.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-    "epoch": 4.996477745757285,
-    "eval_loss": 0.006774455308914185,
-    "eval_runtime": 15.6822,
-    "eval_samples_per_second": 6.377,
-    "eval_steps_per_second": 1.594
 }

 {
+    "epoch": 4.606060606060606,
+    "eval_loss": 0.0018192199058830738,
+    "eval_runtime": 15.5822,
+    "eval_samples_per_second": 6.418,
+    "eval_steps_per_second": 1.604
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 4.996477745757285,
-    "total_flos": 3.832695062431138e+18,
-    "train_loss": 0.009293928827003887,
-    "train_runtime": 29383.8827,
-    "train_samples_per_second": 2.125,
-    "train_steps_per_second": 0.066
 }

 {
+    "epoch": 4.606060606060606,
+    "total_flos": 2.884558791231996e+18,
+    "train_loss": 0.003744818231442192,
+    "train_runtime": 22029.3077,
+    "train_samples_per_second": 11.235,
+    "train_steps_per_second": 0.351
 }

trainer_log.jsonl CHANGED Viewed

The diff for this file is too large to render. See raw diff

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f74ffbf367049062c8f12a9255852294e75f8779ef03ed51b0d435de2871d39
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:2232ef3e623726c9a6cf66ed10d6628c24be8cb32ec8e090bf92c4666bed2c58
 size 5432

training_eval_loss.png CHANGED Viewed

training_loss.png CHANGED Viewed