Upload best model and additional files
Browse files- adapter_config.json +3 -3
- evaluation_results.json +70 -0
- lora_finetuning.log +66 -0
adapter_config.json
CHANGED
@@ -23,12 +23,12 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"q_proj",
|
27 |
-
"o_proj",
|
28 |
"k_proj",
|
29 |
-
"down_proj",
|
30 |
"v_proj",
|
31 |
"gate_proj",
|
|
|
|
|
|
|
32 |
"up_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
|
|
|
|
26 |
"k_proj",
|
|
|
27 |
"v_proj",
|
28 |
"gate_proj",
|
29 |
+
"q_proj",
|
30 |
+
"o_proj",
|
31 |
+
"down_proj",
|
32 |
"up_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
evaluation_results.json
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"1": {
|
3 |
+
"lora_alpha": 23,
|
4 |
+
"lora_dropout": 0.25385249450462455,
|
5 |
+
"learning_rate": 0.0002521627789110728,
|
6 |
+
"gradient_accumulation_steps": 3,
|
7 |
+
"eval_results": {
|
8 |
+
"meteor_scores": {
|
9 |
+
"meteor": 0.0024752475247524753
|
10 |
+
},
|
11 |
+
"rouge_scores": {
|
12 |
+
"rouge1": 0.0,
|
13 |
+
"rouge2": 0.0,
|
14 |
+
"rougeL": 0.0,
|
15 |
+
"rougeLsum": 0.0
|
16 |
+
},
|
17 |
+
"bleu_scores": {
|
18 |
+
"bleu": 0.0,
|
19 |
+
"precisions": [
|
20 |
+
0.014184397163120567,
|
21 |
+
0.0,
|
22 |
+
0.0,
|
23 |
+
0.0
|
24 |
+
],
|
25 |
+
"brevity_penalty": 0.6173806530665256,
|
26 |
+
"length_ratio": 0.6746411483253588,
|
27 |
+
"translation_length": 141,
|
28 |
+
"reference_length": 209
|
29 |
+
},
|
30 |
+
"perplexity": 192550048.0
|
31 |
+
}
|
32 |
+
},
|
33 |
+
"2": {
|
34 |
+
"lora_alpha": 30,
|
35 |
+
"lora_dropout": 0.2587925120959257,
|
36 |
+
"learning_rate": 0.0001665618077288838,
|
37 |
+
"gradient_accumulation_steps": 2,
|
38 |
+
"eval_results": {
|
39 |
+
"meteor_scores": {
|
40 |
+
"meteor": 0.017326732673267325
|
41 |
+
},
|
42 |
+
"rouge_scores": {
|
43 |
+
"rouge1": 0.0,
|
44 |
+
"rouge2": 0.0,
|
45 |
+
"rougeL": 0.0,
|
46 |
+
"rougeLsum": 0.0
|
47 |
+
},
|
48 |
+
"bleu_scores": {
|
49 |
+
"bleu": 0.0,
|
50 |
+
"precisions": [
|
51 |
+
0.05673758865248227,
|
52 |
+
0.0,
|
53 |
+
0.0,
|
54 |
+
0.0
|
55 |
+
],
|
56 |
+
"brevity_penalty": 0.6173806530665256,
|
57 |
+
"length_ratio": 0.6746411483253588,
|
58 |
+
"translation_length": 141,
|
59 |
+
"reference_length": 209
|
60 |
+
},
|
61 |
+
"perplexity": 139593840.0
|
62 |
+
}
|
63 |
+
},
|
64 |
+
"best_param": {
|
65 |
+
"lora_alpha": 30.41238686657094,
|
66 |
+
"lora_dropout": 0.2587925120959257,
|
67 |
+
"learning_rate": 0.0001665618077288838,
|
68 |
+
"gradient_accumulation_steps": 2
|
69 |
+
}
|
70 |
+
}
|
lora_finetuning.log
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-01-28 12:10:31,187 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
|
2 |
+
2025-01-28 12:20:17,720 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
3 |
+
2025-01-28 13:10:47,232 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
|
4 |
+
2025-01-28 13:10:50,924 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
5 |
+
2025-01-28 13:15:32,374 - Using default tokenizer.
|
6 |
+
2025-01-28 13:15:34,813 - Evaluation Results: {'meteor_scores': {'meteor': 0.0024752475247524753}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.014184397163120567, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 212501856.0}
|
7 |
+
2025-01-28 13:15:34,846 - Training with params: lora_alpha=30, lora_dropout=0.2587925120959257, learning_rate=0.0001665618077288838, gradient_accumulation_steps=2
|
8 |
+
2025-01-28 13:15:34,885 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|
9 |
+
2025-01-28 13:24:59,597 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
10 |
+
2025-01-28 13:29:13,244 - Using default tokenizer.
|
11 |
+
2025-01-28 13:29:16,262 - Evaluation Results: {'meteor_scores': {'meteor': 0.017326732673267325}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.05673758865248227, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 133829632.0}
|
12 |
+
2025-01-28 13:29:16,268 - Best hyperparameters: {'gradient_accumulation_steps': 2.2367355022465674, 'learning_rate': 0.0001665618077288838, 'lora_alpha': 30.41238686657094, 'lora_dropout': 0.2587925120959257}
|
13 |
+
2025-01-28 13:29:16,289 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|
14 |
+
2025-01-28 14:12:50,446 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
|
15 |
+
2025-01-28 14:12:53,816 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
16 |
+
2025-01-28 14:15:36,548 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
|
17 |
+
2025-01-28 14:15:40,086 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
18 |
+
2025-01-28 14:20:21,304 - Using default tokenizer.
|
19 |
+
2025-01-28 14:20:23,229 - Evaluation Results: {'meteor_scores': {'meteor': 0.0024752475247524753}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.014184397163120567, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 184389056.0}
|
20 |
+
2025-01-28 14:20:23,256 - Training with params: lora_alpha=30, lora_dropout=0.2587925120959257, learning_rate=0.0001665618077288838, gradient_accumulation_steps=2
|
21 |
+
2025-01-28 14:20:23,279 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|
22 |
+
2025-01-28 14:20:26,700 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
23 |
+
2025-01-28 14:24:39,399 - Using default tokenizer.
|
24 |
+
2025-01-28 14:24:41,483 - Evaluation Results: {'meteor_scores': {'meteor': 0.017326732673267325}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.05673758865248227, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 136236672.0}
|
25 |
+
2025-01-28 14:24:41,489 - Best hyperparameters: {'gradient_accumulation_steps': 2.2367355022465674, 'learning_rate': 0.0001665618077288838, 'lora_alpha': 30.41238686657094, 'lora_dropout': 0.2587925120959257}
|
26 |
+
2025-01-28 14:24:41,512 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|
27 |
+
2025-01-28 14:30:38,720 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
|
28 |
+
2025-01-28 14:30:42,775 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
29 |
+
2025-01-28 14:35:24,928 - Using default tokenizer.
|
30 |
+
2025-01-28 14:35:26,918 - Evaluation Results: {'meteor_scores': {'meteor': 0.039603960396039604}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.11347517730496454, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 194999696.0}
|
31 |
+
2025-01-28 14:35:26,944 - Training with params: lora_alpha=30, lora_dropout=0.2587925120959257, learning_rate=0.0001665618077288838, gradient_accumulation_steps=2
|
32 |
+
2025-01-28 14:35:26,974 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|
33 |
+
2025-01-28 14:35:30,530 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
34 |
+
2025-01-28 14:39:42,678 - Using default tokenizer.
|
35 |
+
2025-01-28 14:39:44,703 - Evaluation Results: {'meteor_scores': {'meteor': 0.017326732673267325}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.05673758865248227, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 140277120.0}
|
36 |
+
2025-01-28 14:39:44,707 - Best hyperparameters: {'gradient_accumulation_steps': 2.2367355022465674, 'learning_rate': 0.0001665618077288838, 'lora_alpha': 30.41238686657094, 'lora_dropout': 0.2587925120959257}
|
37 |
+
2025-01-28 14:39:44,728 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|
38 |
+
2025-01-28 14:51:34,496 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
|
39 |
+
2025-01-28 14:51:38,389 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
40 |
+
2025-01-28 14:56:21,131 - Using default tokenizer.
|
41 |
+
2025-01-28 14:56:23,242 - Evaluation Results: {'meteor_scores': {'meteor': 0.0024752475247524753}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.014184397163120567, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 201304048.0}
|
42 |
+
2025-01-28 14:56:23,271 - Training with params: lora_alpha=30, lora_dropout=0.2587925120959257, learning_rate=0.0001665618077288838, gradient_accumulation_steps=2
|
43 |
+
2025-01-28 14:56:23,363 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|
44 |
+
2025-01-28 14:56:26,765 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
45 |
+
2025-01-28 15:00:41,363 - Using default tokenizer.
|
46 |
+
2025-01-28 15:00:43,468 - Evaluation Results: {'meteor_scores': {'meteor': 0.017326732673267325}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.05673758865248227, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 130403984.0}
|
47 |
+
2025-01-28 15:00:43,472 - Best hyperparameters: {'gradient_accumulation_steps': 2.2367355022465674, 'learning_rate': 0.0001665618077288838, 'lora_alpha': 30.41238686657094, 'lora_dropout': 0.2587925120959257}
|
48 |
+
2025-01-28 15:00:43,493 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|
49 |
+
2025-01-28 15:02:25,610 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
|
50 |
+
2025-01-28 15:02:29,454 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
51 |
+
2025-01-28 15:07:10,155 - Using default tokenizer.
|
52 |
+
2025-01-28 15:07:12,488 - Evaluation Results: {'meteor_scores': {'meteor': 0.0024752475247524753}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.014184397163120567, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 211998656.0}
|
53 |
+
2025-01-28 15:07:12,571 - Training with params: lora_alpha=30, lora_dropout=0.2587925120959257, learning_rate=0.0001665618077288838, gradient_accumulation_steps=2
|
54 |
+
2025-01-28 15:07:12,672 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|
55 |
+
2025-01-28 15:07:16,934 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
56 |
+
2025-01-28 15:12:17,773 - Training with params: lora_alpha=23, lora_dropout=0.25385249450462455, learning_rate=0.0002521627789110728, gradient_accumulation_steps=3
|
57 |
+
2025-01-28 15:12:21,432 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
58 |
+
2025-01-28 15:17:01,601 - Using default tokenizer.
|
59 |
+
2025-01-28 15:17:03,730 - Evaluation Results: {'meteor_scores': {'meteor': 0.0024752475247524753}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.014184397163120567, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 192550048.0}
|
60 |
+
2025-01-28 15:17:03,763 - Training with params: lora_alpha=30, lora_dropout=0.2587925120959257, learning_rate=0.0001665618077288838, gradient_accumulation_steps=2
|
61 |
+
2025-01-28 15:17:03,790 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|
62 |
+
2025-01-28 15:17:07,194 - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
63 |
+
2025-01-28 15:21:19,175 - Using default tokenizer.
|
64 |
+
2025-01-28 15:21:21,237 - Evaluation Results: {'meteor_scores': {'meteor': 0.017326732673267325}, 'rouge_scores': {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}, 'bleu_scores': {'bleu': 0.0, 'precisions': [0.05673758865248227, 0.0, 0.0, 0.0], 'brevity_penalty': 0.6173806530665256, 'length_ratio': 0.6746411483253588, 'translation_length': 141, 'reference_length': 209}, 'perplexity': 139593840.0}
|
65 |
+
2025-01-28 15:21:21,243 - Best hyperparameters: {'gradient_accumulation_steps': 2.2367355022465674, 'learning_rate': 0.0001665618077288838, 'lora_alpha': 30.41238686657094, 'lora_dropout': 0.2587925120959257}
|
66 |
+
2025-01-28 15:21:21,264 - Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!
|