underactuated
commited on
End of training
Browse files- DPO/adapter_config.json +2 -2
- DPO/adapter_model.safetensors +1 -1
- README.md +14 -14
- reference/adapter_config.json +2 -2
- reference/adapter_model.safetensors +1 -1
- runs/Jun24_18-28-36_155-248-201-118/events.out.tfevents.1719253729.155-248-201-118.33150.0 +3 -0
- runs/Jun24_18-29-35_155-248-201-118/events.out.tfevents.1719253776.155-248-201-118.33828.0 +3 -0
- training_args.bin +1 -1
DPO/adapter_config.json
CHANGED
@@ -21,11 +21,11 @@
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"down_proj",
|
24 |
-
"up_proj",
|
25 |
"v_proj",
|
26 |
"o_proj",
|
27 |
-
"k_proj",
|
28 |
"gate_proj",
|
|
|
|
|
29 |
"q_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"down_proj",
|
|
|
24 |
"v_proj",
|
25 |
"o_proj",
|
|
|
26 |
"gate_proj",
|
27 |
+
"up_proj",
|
28 |
+
"k_proj",
|
29 |
"q_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
DPO/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167832240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51b6441543012f61d5fac618e8936ebdf87fb01c69f120a14d955b0eb5082709
|
3 |
size 167832240
|
README.md
CHANGED
@@ -18,15 +18,15 @@ should probably proofread and complete it, then remove this comment. -->
|
|
18 |
|
19 |
This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
-
- Loss: 0.
|
22 |
-
- Rewards/chosen: 0.
|
23 |
-
- Rewards/rejected: 0.
|
24 |
-
- Rewards/accuracies: 0.
|
25 |
-
- Rewards/margins: 0.
|
26 |
-
- Logps/rejected: -
|
27 |
-
- Logps/chosen: -
|
28 |
-
- Logits/rejected: -0.
|
29 |
-
- Logits/chosen: -0.
|
30 |
|
31 |
## Model description
|
32 |
|
@@ -60,11 +60,11 @@ The following hyperparameters were used during training:
|
|
60 |
|
61 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
62 |
|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
63 |
-
| 0.
|
64 |
-
| 0.6919 | 0.1763 | 40 | 0.
|
65 |
-
| 0.
|
66 |
-
| 0.
|
67 |
-
| 0.
|
68 |
|
69 |
|
70 |
### Framework versions
|
|
|
18 |
|
19 |
This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.6891
|
22 |
+
- Rewards/chosen: 0.0621
|
23 |
+
- Rewards/rejected: 0.0538
|
24 |
+
- Rewards/accuracies: 0.6213
|
25 |
+
- Rewards/margins: 0.0083
|
26 |
+
- Logps/rejected: -52.0979
|
27 |
+
- Logps/chosen: -55.7624
|
28 |
+
- Logits/rejected: -0.2991
|
29 |
+
- Logits/chosen: -0.3269
|
30 |
|
31 |
## Model description
|
32 |
|
|
|
60 |
|
61 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
62 |
|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
63 |
+
| 0.6929 | 0.0882 | 20 | 0.6923 | 0.0239 | 0.0221 | 0.6109 | 0.0018 | -52.4151 | -56.1451 | -0.2993 | -0.3271 |
|
64 |
+
| 0.6919 | 0.1763 | 40 | 0.6908 | 0.0490 | 0.0443 | 0.6187 | 0.0047 | -52.1927 | -55.8934 | -0.2991 | -0.3268 |
|
65 |
+
| 0.6903 | 0.2645 | 60 | 0.6898 | 0.0587 | 0.0518 | 0.6157 | 0.0068 | -52.1173 | -55.7971 | -0.2991 | -0.3269 |
|
66 |
+
| 0.6899 | 0.3526 | 80 | 0.6892 | 0.0595 | 0.0515 | 0.6135 | 0.0081 | -52.1210 | -55.7885 | -0.2991 | -0.3269 |
|
67 |
+
| 0.6898 | 0.4408 | 100 | 0.6891 | 0.0621 | 0.0538 | 0.6213 | 0.0083 | -52.0979 | -55.7624 | -0.2991 | -0.3269 |
|
68 |
|
69 |
|
70 |
### Framework versions
|
reference/adapter_config.json
CHANGED
@@ -21,11 +21,11 @@
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"down_proj",
|
24 |
-
"up_proj",
|
25 |
"v_proj",
|
26 |
"o_proj",
|
27 |
-
"k_proj",
|
28 |
"gate_proj",
|
|
|
|
|
29 |
"q_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"down_proj",
|
|
|
24 |
"v_proj",
|
25 |
"o_proj",
|
|
|
26 |
"gate_proj",
|
27 |
+
"up_proj",
|
28 |
+
"k_proj",
|
29 |
"q_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
reference/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167832240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76482a406513cef1b542263101e87b786e24bba632d3af9ec30ccef79a3ebedd
|
3 |
size 167832240
|
runs/Jun24_18-28-36_155-248-201-118/events.out.tfevents.1719253729.155-248-201-118.33150.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:839288b1b507137dac0f5c537a9ae8be443d11739a970609a95d41e0313adc0a
|
3 |
+
size 4184
|
runs/Jun24_18-29-35_155-248-201-118/events.out.tfevents.1719253776.155-248-201-118.33828.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:799a1c4fb98f1aec13a77de44cdb4ef70751ef3a6ad9fdf9a2600d9ca713e12c
|
3 |
+
size 13348
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5688
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6aabc3510676719d749ac928da99449e40a96661d751ee0838c5f43277a60184
|
3 |
size 5688
|