Udith-Sandaruwan commited on
Commit
5073e33
·
verified ·
1 Parent(s): 4e97b3b

Upload best model and additional files

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- library_name: transformers
3
- tags: []
4
  ---
5
 
6
  # Model Card for Model ID
@@ -15,7 +15,7 @@ tags: []
15
 
16
  <!-- Provide a longer summary of what this model is. -->
17
 
18
- This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated.
19
 
20
  - **Developed by:** [More Information Needed]
21
  - **Funded by [optional]:** [More Information Needed]
@@ -196,4 +196,7 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
196
 
197
  ## Model Card Contact
198
 
199
- [More Information Needed]
 
 
 
 
1
  ---
2
+ base_model: Udith-Sandaruwan/Llama-3.1-8B-Continued-Pretrained-in-ou-2
3
+ library_name: peft
4
  ---
5
 
6
  # Model Card for Model ID
 
15
 
16
  <!-- Provide a longer summary of what this model is. -->
17
 
18
+
19
 
20
  - **Developed by:** [More Information Needed]
21
  - **Funded by [optional]:** [More Information Needed]
 
196
 
197
  ## Model Card Contact
198
 
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.14.0
adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "up_proj",
 
27
  "k_proj",
28
  "down_proj",
29
  "v_proj",
30
- "q_proj",
31
- "o_proj",
32
- "gate_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "q_proj",
27
+ "o_proj",
28
  "k_proj",
29
  "down_proj",
30
  "v_proj",
31
+ "gate_proj",
32
+ "up_proj"
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
evaluation_results.json CHANGED
@@ -6,7 +6,7 @@
6
  "gradient_accumulation_steps": 3,
7
  "eval_results": {
8
  "meteor_scores": {
9
- "meteor": 0.039603960396039604
10
  },
11
  "rouge_scores": {
12
  "rouge1": 0.0,
@@ -17,7 +17,7 @@
17
  "bleu_scores": {
18
  "bleu": 0.0,
19
  "precisions": [
20
- 0.11347517730496454,
21
  0.0,
22
  0.0,
23
  0.0
@@ -27,7 +27,7 @@
27
  "translation_length": 141,
28
  "reference_length": 209
29
  },
30
- "perplexity": 194999696.0
31
  }
32
  },
33
  "2": {
@@ -58,7 +58,7 @@
58
  "translation_length": 141,
59
  "reference_length": 209
60
  },
61
- "perplexity": 140277120.0
62
  }
63
  },
64
  "best_param": {
 
6
  "gradient_accumulation_steps": 3,
7
  "eval_results": {
8
  "meteor_scores": {
9
+ "meteor": 0.0024752475247524753
10
  },
11
  "rouge_scores": {
12
  "rouge1": 0.0,
 
17
  "bleu_scores": {
18
  "bleu": 0.0,
19
  "precisions": [
20
+ 0.014184397163120567,
21
  0.0,
22
  0.0,
23
  0.0
 
27
  "translation_length": 141,
28
  "reference_length": 209
29
  },
30
+ "perplexity": 201304048.0
31
  }
32
  },
33
  "2": {
 
58
  "translation_length": 141,
59
  "reference_length": 209
60
  },
61
+ "perplexity": 130403984.0
62
  }
63
  },
64
  "best_param": {
lora_finetuning.log ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-01-28 12:10:31,187 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
2
+ 2025-01-28 12:20:17,720 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
3
+ 2025-01-28 13:10:47,232 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
4
+ 2025-01-28 13:10:50,924 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
5
+ 2025-01-28 13:15:32,374 - Using default tokenizer.
6
+ 2025-01-28 13:15:34,813 - Evaluation Results: {'meteor_scores': {'meteor': 0.0024752475247524753}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.014184397163120567, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 212501856.0}
7
+ 2025-01-28 13:15:34,846 - Training with params: lora_alpha=30, lora_dropout=0.2587925120959257, learning_rate=0.0001665618077288838, gradient_accumulation_steps=2
8
+ 2025-01-28 13:15:34,885 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
9
+ 2025-01-28 13:24:59,597 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
10
+ 2025-01-28 13:29:13,244 - Using default tokenizer.
11
+ 2025-01-28 13:29:16,262 - Evaluation Results: {'meteor_scores': {'meteor': 0.017326732673267325}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.05673758865248227, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 133829632.0}
12
+ 2025-01-28 13:29:16,268 - Best hyperparameters: {'gradient_accumulation_steps': 2.2367355022465674, 'learning_rate': 0.0001665618077288838, 'lora_alpha': 30.41238686657094, 'lora_dropout': 0.2587925120959257}
13
+ 2025-01-28 13:29:16,289 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
14
+ 2025-01-28 14:12:50,446 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
15
+ 2025-01-28 14:12:53,816 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
16
+ 2025-01-28 14:15:36,548 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
17
+ 2025-01-28 14:15:40,086 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
18
+ 2025-01-28 14:20:21,304 - Using default tokenizer.
19
+ 2025-01-28 14:20:23,229 - Evaluation Results: {'meteor_scores': {'meteor': 0.0024752475247524753}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.014184397163120567, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 184389056.0}
20
+ 2025-01-28 14:20:23,256 - Training with params: lora_alpha=30, lora_dropout=0.2587925120959257, learning_rate=0.0001665618077288838, gradient_accumulation_steps=2
21
+ 2025-01-28 14:20:23,279 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
22
+ 2025-01-28 14:20:26,700 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
23
+ 2025-01-28 14:24:39,399 - Using default tokenizer.
24
+ 2025-01-28 14:24:41,483 - Evaluation Results: {'meteor_scores': {'meteor': 0.017326732673267325}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.05673758865248227, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 136236672.0}
25
+ 2025-01-28 14:24:41,489 - Best hyperparameters: {'gradient_accumulation_steps': 2.2367355022465674, 'learning_rate': 0.0001665618077288838, 'lora_alpha': 30.41238686657094, 'lora_dropout': 0.2587925120959257}
26
+ 2025-01-28 14:24:41,512 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
27
+ 2025-01-28 14:30:38,720 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
28
+ 2025-01-28 14:30:42,775 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
29
+ 2025-01-28 14:35:24,928 - Using default tokenizer.
30
+ 2025-01-28 14:35:26,918 - Evaluation Results: {'meteor_scores': {'meteor': 0.039603960396039604}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.11347517730496454, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 194999696.0}
31
+ 2025-01-28 14:35:26,944 - Training with params: lora_alpha=30, lora_dropout=0.2587925120959257, learning_rate=0.0001665618077288838, gradient_accumulation_steps=2
32
+ 2025-01-28 14:35:26,974 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
33
+ 2025-01-28 14:35:30,530 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
34
+ 2025-01-28 14:39:42,678 - Using default tokenizer.
35
+ 2025-01-28 14:39:44,703 - Evaluation Results: {'meteor_scores': {'meteor': 0.017326732673267325}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.05673758865248227, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 140277120.0}
36
+ 2025-01-28 14:39:44,707 - Best hyperparameters: {'gradient_accumulation_steps': 2.2367355022465674, 'learning_rate': 0.0001665618077288838, 'lora_alpha': 30.41238686657094, 'lora_dropout': 0.2587925120959257}
37
+ 2025-01-28 14:39:44,728 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
38
+ 2025-01-28 14:51:34,496 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
39
+ 2025-01-28 14:51:38,389 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
40
+ 2025-01-28 14:56:21,131 - Using default tokenizer.
41
+ 2025-01-28 14:56:23,242 - Evaluation Results: {'meteor_scores': {'meteor': 0.0024752475247524753}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.014184397163120567, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 201304048.0}
42
+ 2025-01-28 14:56:23,271 - Training with params: lora_alpha=30, lora_dropout=0.2587925120959257, learning_rate=0.0001665618077288838, gradient_accumulation_steps=2
43
+ 2025-01-28 14:56:23,363 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
44
+ 2025-01-28 14:56:26,765 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
45
+ 2025-01-28 15:00:41,363 - Using default tokenizer.
46
+ 2025-01-28 15:00:43,468 - Evaluation Results: {'meteor_scores': {'meteor': 0.017326732673267325}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.05673758865248227, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 130403984.0}
47
+ 2025-01-28 15:00:43,472 - Best hyperparameters: {'gradient_accumulation_steps': 2.2367355022465674, 'learning_rate': 0.0001665618077288838, 'lora_alpha': 30.41238686657094, 'lora_dropout': 0.2587925120959257}
48
+ 2025-01-28 15:00:43,493 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!