Model save
Browse files- README.md +1 -1
- adapter_config.json +3 -3
- all_results.json +4 -4
- train_results.json +4 -4
- trainer_state.json +4 -4
- training_args.bin +1 -1
README.md
CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
|
|
27 |
|
28 |
## Training procedure
|
29 |
|
30 |
-
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/felipealumni-usp/huggingface/runs/
|
31 |
|
32 |
This model was trained with SFT.
|
33 |
|
|
|
27 |
|
28 |
## Training procedure
|
29 |
|
30 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/felipealumni-usp/huggingface/runs/9ucqxz8g)
|
31 |
|
32 |
This model was trained with SFT.
|
33 |
|
adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"
|
|
|
27 |
"down_proj",
|
28 |
"up_proj",
|
29 |
-
"o_proj",
|
30 |
"v_proj",
|
31 |
"q_proj",
|
32 |
-
"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"k_proj",
|
27 |
+
"o_proj",
|
28 |
"down_proj",
|
29 |
"up_proj",
|
|
|
30 |
"v_proj",
|
31 |
"q_proj",
|
32 |
+
"gate_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
all_results.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"epoch": 0.9992542878448919,
|
3 |
"eval_samples": 26,
|
4 |
"total_flos": 7.44129634982953e+16,
|
5 |
-
"train_loss": 0.
|
6 |
-
"train_runtime":
|
7 |
"train_samples": 25295,
|
8 |
-
"train_samples_per_second":
|
9 |
-
"train_steps_per_second":
|
10 |
}
|
|
|
2 |
"epoch": 0.9992542878448919,
|
3 |
"eval_samples": 26,
|
4 |
"total_flos": 7.44129634982953e+16,
|
5 |
+
"train_loss": 0.0,
|
6 |
+
"train_runtime": 12.907,
|
7 |
"train_samples": 25295,
|
8 |
+
"train_samples_per_second": 415.356,
|
9 |
+
"train_steps_per_second": 51.91
|
10 |
}
|
train_results.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"epoch": 0.9992542878448919,
|
3 |
"eval_samples": 26,
|
4 |
"total_flos": 7.44129634982953e+16,
|
5 |
-
"train_loss": 0.
|
6 |
-
"train_runtime":
|
7 |
"train_samples": 25295,
|
8 |
-
"train_samples_per_second":
|
9 |
-
"train_steps_per_second":
|
10 |
}
|
|
|
2 |
"epoch": 0.9992542878448919,
|
3 |
"eval_samples": 26,
|
4 |
"total_flos": 7.44129634982953e+16,
|
5 |
+
"train_loss": 0.0,
|
6 |
+
"train_runtime": 12.907,
|
7 |
"train_samples": 25295,
|
8 |
+
"train_samples_per_second": 415.356,
|
9 |
+
"train_steps_per_second": 51.91
|
10 |
}
|
trainer_state.json
CHANGED
@@ -402,10 +402,10 @@
|
|
402 |
"epoch": 0.9992542878448919,
|
403 |
"step": 670,
|
404 |
"total_flos": 7.44129634982953e+16,
|
405 |
-
"train_loss": 0.
|
406 |
-
"train_runtime":
|
407 |
-
"train_samples_per_second":
|
408 |
-
"train_steps_per_second":
|
409 |
}
|
410 |
],
|
411 |
"logging_steps": 25,
|
|
|
402 |
"epoch": 0.9992542878448919,
|
403 |
"step": 670,
|
404 |
"total_flos": 7.44129634982953e+16,
|
405 |
+
"train_loss": 0.0,
|
406 |
+
"train_runtime": 12.907,
|
407 |
+
"train_samples_per_second": 415.356,
|
408 |
+
"train_steps_per_second": 51.91
|
409 |
}
|
410 |
],
|
411 |
"logging_steps": 25,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77325f7c9b3efc0699af0bfa2fcc5b4625dbb28ecb6d80c9ebd0d5972803c47f
|
3 |
size 6904
|