Upload best model and additional files
Browse files- README.md +7 -4
- adapter_config.json +4 -4
- evaluation_results.json +4 -4
- lora_finetuning.log +48 -0
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
-
|
3 |
-
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
@@ -15,7 +15,7 @@ tags: []
|
|
15 |
|
16 |
<!-- Provide a longer summary of what this model is. -->
|
17 |
|
18 |
-
|
19 |
|
20 |
- **Developed by:** [More Information Needed]
|
21 |
- **Funded by [optional]:** [More Information Needed]
|
@@ -196,4 +196,7 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
|
|
196 |
|
197 |
## Model Card Contact
|
198 |
|
199 |
-
[More Information Needed]
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
base_model: Udith-Sandaruwan/Llama-3.1-8B-Continued-Pretrained-in-ou-2
|
3 |
+
library_name: peft
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
|
|
15 |
|
16 |
<!-- Provide a longer summary of what this model is. -->
|
17 |
|
18 |
+
|
19 |
|
20 |
- **Developed by:** [More Information Needed]
|
21 |
- **Funded by [optional]:** [More Information Needed]
|
|
|
196 |
|
197 |
## Model Card Contact
|
198 |
|
199 |
+
[More Information Needed]
|
200 |
+
### Framework versions
|
201 |
+
|
202 |
+
- PEFT 0.14.0
|
adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"
|
|
|
27 |
"k_proj",
|
28 |
"down_proj",
|
29 |
"v_proj",
|
30 |
-
"
|
31 |
-
"
|
32 |
-
"gate_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"q_proj",
|
27 |
+
"o_proj",
|
28 |
"k_proj",
|
29 |
"down_proj",
|
30 |
"v_proj",
|
31 |
+
"gate_proj",
|
32 |
+
"up_proj"
|
|
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
evaluation_results.json
CHANGED
@@ -6,7 +6,7 @@
|
|
6 |
"gradient_accumulation_steps": 3,
|
7 |
"eval_results": {
|
8 |
"meteor_scores": {
|
9 |
-
"meteor": 0.
|
10 |
},
|
11 |
"rouge_scores": {
|
12 |
"rouge1": 0.0,
|
@@ -17,7 +17,7 @@
|
|
17 |
"bleu_scores": {
|
18 |
"bleu": 0.0,
|
19 |
"precisions": [
|
20 |
-
0.
|
21 |
0.0,
|
22 |
0.0,
|
23 |
0.0
|
@@ -27,7 +27,7 @@
|
|
27 |
"translation_length": 141,
|
28 |
"reference_length": 209
|
29 |
},
|
30 |
-
"perplexity":
|
31 |
}
|
32 |
},
|
33 |
"2": {
|
@@ -58,7 +58,7 @@
|
|
58 |
"translation_length": 141,
|
59 |
"reference_length": 209
|
60 |
},
|
61 |
-
"perplexity":
|
62 |
}
|
63 |
},
|
64 |
"best_param": {
|
|
|
6 |
"gradient_accumulation_steps": 3,
|
7 |
"eval_results": {
|
8 |
"meteor_scores": {
|
9 |
+
"meteor": 0.0024752475247524753
|
10 |
},
|
11 |
"rouge_scores": {
|
12 |
"rouge1": 0.0,
|
|
|
17 |
"bleu_scores": {
|
18 |
"bleu": 0.0,
|
19 |
"precisions": [
|
20 |
+
0.014184397163120567,
|
21 |
0.0,
|
22 |
0.0,
|
23 |
0.0
|
|
|
27 |
"translation_length": 141,
|
28 |
"reference_length": 209
|
29 |
},
|
30 |
+
"perplexity": 201304048.0
|
31 |
}
|
32 |
},
|
33 |
"2": {
|
|
|
58 |
"translation_length": 141,
|
59 |
"reference_length": 209
|
60 |
},
|
61 |
+
"perplexity": 130403984.0
|
62 |
}
|
63 |
},
|
64 |
"best_param": {
|
lora_finetuning.log
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-01-28 12:10:31,187 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
|
2 |
+
2025-01-28 12:20:17,720 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
3 |
+
2025-01-28 13:10:47,232 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
|
4 |
+
2025-01-28 13:10:50,924 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
5 |
+
2025-01-28 13:15:32,374 - Using default tokenizer.
|
6 |
+
2025-01-28 13:15:34,813 - Evaluation Results: {'meteor_scores': {'meteor': 0.0024752475247524753}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.014184397163120567, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 212501856.0}
|
7 |
+
2025-01-28 13:15:34,846 - Training with params: lora_alpha=30, lora_dropout=0.2587925120959257, learning_rate=0.0001665618077288838, gradient_accumulation_steps=2
|
8 |
+
2025-01-28 13:15:34,885 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|
9 |
+
2025-01-28 13:24:59,597 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
10 |
+
2025-01-28 13:29:13,244 - Using default tokenizer.
|
11 |
+
2025-01-28 13:29:16,262 - Evaluation Results: {'meteor_scores': {'meteor': 0.017326732673267325}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.05673758865248227, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 133829632.0}
|
12 |
+
2025-01-28 13:29:16,268 - Best hyperparameters: {'gradient_accumulation_steps': 2.2367355022465674, 'learning_rate': 0.0001665618077288838, 'lora_alpha': 30.41238686657094, 'lora_dropout': 0.2587925120959257}
|
13 |
+
2025-01-28 13:29:16,289 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|
14 |
+
2025-01-28 14:12:50,446 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
|
15 |
+
2025-01-28 14:12:53,816 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
16 |
+
2025-01-28 14:15:36,548 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
|
17 |
+
2025-01-28 14:15:40,086 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
18 |
+
2025-01-28 14:20:21,304 - Using default tokenizer.
|
19 |
+
2025-01-28 14:20:23,229 - Evaluation Results: {'meteor_scores': {'meteor': 0.0024752475247524753}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.014184397163120567, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 184389056.0}
|
20 |
+
2025-01-28 14:20:23,256 - Training with params: lora_alpha=30, lora_dropout=0.2587925120959257, learning_rate=0.0001665618077288838, gradient_accumulation_steps=2
|
21 |
+
2025-01-28 14:20:23,279 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|
22 |
+
2025-01-28 14:20:26,700 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
23 |
+
2025-01-28 14:24:39,399 - Using default tokenizer.
|
24 |
+
2025-01-28 14:24:41,483 - Evaluation Results: {'meteor_scores': {'meteor': 0.017326732673267325}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.05673758865248227, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 136236672.0}
|
25 |
+
2025-01-28 14:24:41,489 - Best hyperparameters: {'gradient_accumulation_steps': 2.2367355022465674, 'learning_rate': 0.0001665618077288838, 'lora_alpha': 30.41238686657094, 'lora_dropout': 0.2587925120959257}
|
26 |
+
2025-01-28 14:24:41,512 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|
27 |
+
2025-01-28 14:30:38,720 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
|
28 |
+
2025-01-28 14:30:42,775 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
29 |
+
2025-01-28 14:35:24,928 - Using default tokenizer.
|
30 |
+
2025-01-28 14:35:26,918 - Evaluation Results: {'meteor_scores': {'meteor': 0.039603960396039604}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.11347517730496454, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 194999696.0}
|
31 |
+
2025-01-28 14:35:26,944 - Training with params: lora_alpha=30, lora_dropout=0.2587925120959257, learning_rate=0.0001665618077288838, gradient_accumulation_steps=2
|
32 |
+
2025-01-28 14:35:26,974 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|
33 |
+
2025-01-28 14:35:30,530 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
34 |
+
2025-01-28 14:39:42,678 - Using default tokenizer.
|
35 |
+
2025-01-28 14:39:44,703 - Evaluation Results: {'meteor_scores': {'meteor': 0.017326732673267325}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.05673758865248227, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 140277120.0}
|
36 |
+
2025-01-28 14:39:44,707 - Best hyperparameters: {'gradient_accumulation_steps': 2.2367355022465674, 'learning_rate': 0.0001665618077288838, 'lora_alpha': 30.41238686657094, 'lora_dropout': 0.2587925120959257}
|
37 |
+
2025-01-28 14:39:44,728 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|
38 |
+
2025-01-28 14:51:34,496 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
|
39 |
+
2025-01-28 14:51:38,389 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
40 |
+
2025-01-28 14:56:21,131 - Using default tokenizer.
|
41 |
+
2025-01-28 14:56:23,242 - Evaluation Results: {'meteor_scores': {'meteor': 0.0024752475247524753}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.014184397163120567, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 201304048.0}
|
42 |
+
2025-01-28 14:56:23,271 - Training with params: lora_alpha=30, lora_dropout=0.2587925120959257, learning_rate=0.0001665618077288838, gradient_accumulation_steps=2
|
43 |
+
2025-01-28 14:56:23,363 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|
44 |
+
2025-01-28 14:56:26,765 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
45 |
+
2025-01-28 15:00:41,363 - Using default tokenizer.
|
46 |
+
2025-01-28 15:00:43,468 - Evaluation Results: {'meteor_scores': {'meteor': 0.017326732673267325}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.05673758865248227, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 130403984.0}
|
47 |
+
2025-01-28 15:00:43,472 - Best hyperparameters: {'gradient_accumulation_steps': 2.2367355022465674, 'learning_rate': 0.0001665618077288838, 'lora_alpha': 30.41238686657094, 'lora_dropout': 0.2587925120959257}
|
48 |
+
2025-01-28 15:00:43,493 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|