End of training
Browse files- README.md +3 -2
- all_results.json +3 -3
- train_results.json +3 -3
- trainer_state.json +3 -3
README.md
CHANGED
@@ -2,8 +2,9 @@
|
|
2 |
license: apache-2.0
|
3 |
library_name: peft
|
4 |
tags:
|
5 |
-
- unsloth
|
6 |
- llama-factory
|
|
|
|
|
7 |
- generated_from_trainer
|
8 |
base_model: unsloth/llama-3-8b-Instruct-bnb-4bit
|
9 |
model-index:
|
@@ -16,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
16 |
|
17 |
# llama-3-8b-finetuned
|
18 |
|
19 |
-
This model is a fine-tuned version of [unsloth/llama-3-8b-Instruct-bnb-4bit](https://huggingface.co/unsloth/llama-3-8b-Instruct-bnb-4bit) on
|
20 |
|
21 |
## Model description
|
22 |
|
|
|
2 |
license: apache-2.0
|
3 |
library_name: peft
|
4 |
tags:
|
|
|
5 |
- llama-factory
|
6 |
+
- lora
|
7 |
+
- unsloth
|
8 |
- generated_from_trainer
|
9 |
base_model: unsloth/llama-3-8b-Instruct-bnb-4bit
|
10 |
model-index:
|
|
|
17 |
|
18 |
# llama-3-8b-finetuned
|
19 |
|
20 |
+
This model is a fine-tuned version of [unsloth/llama-3-8b-Instruct-bnb-4bit](https://huggingface.co/unsloth/llama-3-8b-Instruct-bnb-4bit) on the formatted_data dataset.
|
21 |
|
22 |
## Model description
|
23 |
|
all_results.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"epoch": 2.0,
|
3 |
"total_flos": 1.5362943036162048e+16,
|
4 |
"train_loss": 0.16884834933280946,
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples_per_second": 0.
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
2 |
"epoch": 2.0,
|
3 |
"total_flos": 1.5362943036162048e+16,
|
4 |
"train_loss": 0.16884834933280946,
|
5 |
+
"train_runtime": 1424.4252,
|
6 |
+
"train_samples_per_second": 0.702,
|
7 |
+
"train_steps_per_second": 0.176
|
8 |
}
|
train_results.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"epoch": 2.0,
|
3 |
"total_flos": 1.5362943036162048e+16,
|
4 |
"train_loss": 0.16884834933280946,
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples_per_second": 0.
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
2 |
"epoch": 2.0,
|
3 |
"total_flos": 1.5362943036162048e+16,
|
4 |
"train_loss": 0.16884834933280946,
|
5 |
+
"train_runtime": 1424.4252,
|
6 |
+
"train_samples_per_second": 0.702,
|
7 |
+
"train_steps_per_second": 0.176
|
8 |
}
|
trainer_state.json
CHANGED
@@ -188,9 +188,9 @@
|
|
188 |
"step": 250,
|
189 |
"total_flos": 1.5362943036162048e+16,
|
190 |
"train_loss": 0.16884834933280946,
|
191 |
-
"train_runtime":
|
192 |
-
"train_samples_per_second": 0.
|
193 |
-
"train_steps_per_second": 0.
|
194 |
}
|
195 |
],
|
196 |
"logging_steps": 10,
|
|
|
188 |
"step": 250,
|
189 |
"total_flos": 1.5362943036162048e+16,
|
190 |
"train_loss": 0.16884834933280946,
|
191 |
+
"train_runtime": 1424.4252,
|
192 |
+
"train_samples_per_second": 0.702,
|
193 |
+
"train_steps_per_second": 0.176
|
194 |
}
|
195 |
],
|
196 |
"logging_steps": 10,
|