Model save
Browse files- README.md +11 -33
- all_results.json +18 -18
- eval_results.json +13 -13
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- runs/Jan30_08-23-58_node01/events.out.tfevents.1706574583.node01.2673416.0 +3 -0
- runs/Jan30_08-32-51_node01/events.out.tfevents.1706574838.node01.2675529.0 +3 -0
- runs/Jan30_08-32-51_node01/events.out.tfevents.1706592282.node01.2675529.1 +3 -0
- train_results.json +5 -5
- trainer_state.json +0 -0
- training_args.bin +1 -1
README.md
CHANGED
@@ -17,15 +17,15 @@ should probably proofread and complete it, then remove this comment. -->
|
|
17 |
|
18 |
This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
|
19 |
It achieves the following results on the evaluation set:
|
20 |
-
- Loss: 0.
|
21 |
-
- Rewards/chosen: -
|
22 |
-
- Rewards/rejected: -26.
|
23 |
-
- Rewards/accuracies: 0.
|
24 |
-
- Rewards/margins:
|
25 |
-
- Logps/rejected: -
|
26 |
-
- Logps/chosen: -
|
27 |
-
- Logits/rejected: -
|
28 |
-
- Logits/chosen: -2.
|
29 |
|
30 |
## Model description
|
31 |
|
@@ -61,30 +61,8 @@ The following hyperparameters were used during training:
|
|
61 |
|
62 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
63 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
64 |
-
| 0.
|
65 |
-
| 0.
|
66 |
-
| 0.0324 | 0.24 | 300 | 0.0367 | -0.9306 | -8.1206 | 0.9886 | 7.1900 | -169.6023 | -166.1387 | -2.3643 | -2.5461 |
|
67 |
-
| 0.0373 | 0.32 | 400 | 0.0352 | -0.3792 | -7.3599 | 0.9905 | 6.9807 | -161.9944 | -160.6242 | -2.3352 | -2.5608 |
|
68 |
-
| 0.0151 | 0.4 | 500 | 0.0259 | -2.0957 | -11.1375 | 0.9896 | 9.0418 | -199.7707 | -177.7890 | -2.3514 | -2.5164 |
|
69 |
-
| 0.0227 | 0.48 | 600 | 0.0228 | -1.5779 | -10.9393 | 0.9905 | 9.3614 | -197.7892 | -172.6115 | -2.2954 | -2.5141 |
|
70 |
-
| 0.0225 | 0.57 | 700 | 0.0276 | -1.8300 | -10.5178 | 0.9886 | 8.6878 | -193.5734 | -175.1324 | -2.4148 | -2.6502 |
|
71 |
-
| 0.0251 | 0.65 | 800 | 0.0296 | -4.7354 | -16.6488 | 0.9867 | 11.9133 | -254.8836 | -204.1870 | -2.0531 | -2.4265 |
|
72 |
-
| 0.0197 | 0.73 | 900 | 0.0184 | -5.0699 | -18.4648 | 0.9924 | 13.3949 | -273.0435 | -207.5311 | -1.6415 | -2.3304 |
|
73 |
-
| 0.0087 | 0.81 | 1000 | 0.0189 | -3.2908 | -19.6316 | 0.9905 | 16.3408 | -284.7117 | -189.7405 | -1.5669 | -2.2844 |
|
74 |
-
| 0.025 | 0.89 | 1100 | 0.0161 | -4.9723 | -19.8966 | 0.9924 | 14.9243 | -287.3613 | -206.5555 | -1.9588 | -2.4809 |
|
75 |
-
| 0.0299 | 0.97 | 1200 | 0.0192 | -7.1736 | -21.2381 | 0.9915 | 14.0645 | -300.7769 | -228.5686 | -1.7745 | -2.3342 |
|
76 |
-
| 0.0014 | 1.05 | 1300 | 0.0184 | -3.8048 | -21.7578 | 0.9915 | 17.9530 | -305.9737 | -194.8804 | -1.6148 | -2.3594 |
|
77 |
-
| 0.0015 | 1.13 | 1400 | 0.0153 | -5.9221 | -23.9435 | 0.9905 | 18.0214 | -327.8304 | -216.0534 | -1.6351 | -2.3309 |
|
78 |
-
| 0.0019 | 1.21 | 1500 | 0.0166 | -3.5050 | -23.2207 | 0.9905 | 19.7157 | -320.6025 | -191.8821 | -1.5712 | -2.4214 |
|
79 |
-
| 0.0055 | 1.29 | 1600 | 0.0150 | -4.9182 | -24.9562 | 0.9896 | 20.0380 | -337.9579 | -206.0149 | -1.0331 | -2.2401 |
|
80 |
-
| 0.0019 | 1.37 | 1700 | 0.0151 | -5.7779 | -25.7925 | 0.9896 | 20.0146 | -346.3207 | -214.6113 | -0.9177 | -2.1887 |
|
81 |
-
| 0.0009 | 1.45 | 1800 | 0.0174 | -4.9655 | -26.7260 | 0.9886 | 21.7605 | -355.6559 | -206.4873 | -0.4171 | -1.9919 |
|
82 |
-
| 0.0086 | 1.53 | 1900 | 0.0159 | -6.3223 | -27.5136 | 0.9886 | 21.1913 | -363.5322 | -220.0559 | -0.7783 | -2.1291 |
|
83 |
-
| 0.0009 | 1.62 | 2000 | 0.0148 | -4.9109 | -24.0100 | 0.9905 | 19.0991 | -328.4960 | -205.9413 | -1.6403 | -2.3910 |
|
84 |
-
| 0.0015 | 1.7 | 2100 | 0.0152 | -4.5612 | -25.2891 | 0.9886 | 20.7279 | -341.2871 | -202.4446 | -1.5789 | -2.3995 |
|
85 |
-
| 0.0028 | 1.78 | 2200 | 0.0174 | -4.5140 | -26.8924 | 0.9877 | 22.3784 | -357.3196 | -201.9725 | -1.3680 | -2.3331 |
|
86 |
-
| 0.0018 | 1.86 | 2300 | 0.0166 | -4.5053 | -26.4789 | 0.9867 | 21.9736 | -353.1845 | -201.8854 | -1.5207 | -2.3959 |
|
87 |
-
| 0.0025 | 1.94 | 2400 | 0.0171 | -4.5227 | -27.0564 | 0.9867 | 22.5337 | -358.9594 | -202.0593 | -1.4889 | -2.3903 |
|
88 |
|
89 |
|
90 |
### Framework versions
|
|
|
17 |
|
18 |
This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
|
19 |
It achieves the following results on the evaluation set:
|
20 |
+
- Loss: 0.0241
|
21 |
+
- Rewards/chosen: -1.3913
|
22 |
+
- Rewards/rejected: -26.2733
|
23 |
+
- Rewards/accuracies: 0.9940
|
24 |
+
- Rewards/margins: 24.8821
|
25 |
+
- Logps/rejected: -348.4335
|
26 |
+
- Logps/chosen: -162.1273
|
27 |
+
- Logits/rejected: -2.3713
|
28 |
+
- Logits/chosen: -2.5987
|
29 |
|
30 |
## Model description
|
31 |
|
|
|
61 |
|
62 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
63 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
64 |
+
| 0.0268 | 0.72 | 500 | 0.0314 | -0.9700 | -9.7922 | 0.9960 | 8.8222 | -183.6220 | -157.9143 | -2.2332 | -2.4086 |
|
65 |
+
| 0.0022 | 1.44 | 1000 | 0.0229 | -1.6108 | -24.5526 | 0.9960 | 22.9418 | -331.2263 | -164.3232 | -2.4361 | -2.6330 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
|
68 |
### Framework versions
|
all_results.json
CHANGED
@@ -1,21 +1,21 @@
|
|
1 |
{
|
2 |
"epoch": 2.0,
|
3 |
-
"eval_logits/chosen": -2.
|
4 |
-
"eval_logits/rejected": -
|
5 |
-
"eval_logps/chosen": -
|
6 |
-
"eval_logps/rejected": -
|
7 |
-
"eval_loss": 0.
|
8 |
-
"eval_rewards/accuracies": 0.
|
9 |
-
"eval_rewards/chosen": -
|
10 |
-
"eval_rewards/margins":
|
11 |
-
"eval_rewards/rejected": -26.
|
12 |
-
"eval_runtime":
|
13 |
-
"eval_samples":
|
14 |
-
"eval_samples_per_second":
|
15 |
-
"eval_steps_per_second": 0.
|
16 |
-
"train_loss": 0.
|
17 |
-
"train_runtime":
|
18 |
-
"train_samples":
|
19 |
-
"train_samples_per_second":
|
20 |
-
"train_steps_per_second": 0.
|
21 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.0,
|
3 |
+
"eval_logits/chosen": -2.5986759662628174,
|
4 |
+
"eval_logits/rejected": -2.371307134628296,
|
5 |
+
"eval_logps/chosen": -162.12734985351562,
|
6 |
+
"eval_logps/rejected": -348.43353271484375,
|
7 |
+
"eval_loss": 0.024077776819467545,
|
8 |
+
"eval_rewards/accuracies": 0.9940476417541504,
|
9 |
+
"eval_rewards/chosen": -1.3912619352340698,
|
10 |
+
"eval_rewards/margins": 24.882064819335938,
|
11 |
+
"eval_rewards/rejected": -26.27332878112793,
|
12 |
+
"eval_runtime": 908.8237,
|
13 |
+
"eval_samples": 2000,
|
14 |
+
"eval_samples_per_second": 2.201,
|
15 |
+
"eval_steps_per_second": 0.069,
|
16 |
+
"train_loss": 0.04402416471998484,
|
17 |
+
"train_runtime": 16535.8816,
|
18 |
+
"train_samples": 11091,
|
19 |
+
"train_samples_per_second": 1.341,
|
20 |
+
"train_steps_per_second": 0.084
|
21 |
}
|
eval_results.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"epoch": 2.0,
|
3 |
-
"eval_logits/chosen": -2.
|
4 |
-
"eval_logits/rejected": -
|
5 |
-
"eval_logps/chosen": -
|
6 |
-
"eval_logps/rejected": -
|
7 |
-
"eval_loss": 0.
|
8 |
-
"eval_rewards/accuracies": 0.
|
9 |
-
"eval_rewards/chosen": -
|
10 |
-
"eval_rewards/margins":
|
11 |
-
"eval_rewards/rejected": -26.
|
12 |
-
"eval_runtime":
|
13 |
-
"eval_samples":
|
14 |
-
"eval_samples_per_second":
|
15 |
-
"eval_steps_per_second": 0.
|
16 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.0,
|
3 |
+
"eval_logits/chosen": -2.5986759662628174,
|
4 |
+
"eval_logits/rejected": -2.371307134628296,
|
5 |
+
"eval_logps/chosen": -162.12734985351562,
|
6 |
+
"eval_logps/rejected": -348.43353271484375,
|
7 |
+
"eval_loss": 0.024077776819467545,
|
8 |
+
"eval_rewards/accuracies": 0.9940476417541504,
|
9 |
+
"eval_rewards/chosen": -1.3912619352340698,
|
10 |
+
"eval_rewards/margins": 24.882064819335938,
|
11 |
+
"eval_rewards/rejected": -26.27332878112793,
|
12 |
+
"eval_runtime": 908.8237,
|
13 |
+
"eval_samples": 2000,
|
14 |
+
"eval_samples_per_second": 2.201,
|
15 |
+
"eval_steps_per_second": 0.069
|
16 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4943162336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:311f0ae6fb60963e9e1041bbdc07c5c3e1e531d1a30ae23f11f6b5aa5e175b80
|
3 |
size 4943162336
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df859f7d4ece46c79afb3a10b87508a2ddd673e18b249e3fecb61f5d303d2bc8
|
3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4540516344
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44c2a993ceebaaef856e5e0620fbea210b968ad7638cb94767bd33dc7fa947f7
|
3 |
size 4540516344
|
runs/Jan30_08-23-58_node01/events.out.tfevents.1706574583.node01.2673416.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b0c58c5e7b7206b30e4f09260e4eb25a633bbacd045487925d5c1653a8a0906
|
3 |
+
size 5667
|
runs/Jan30_08-32-51_node01/events.out.tfevents.1706574838.node01.2675529.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f1c71f90b702c8c444e57d9e3c3a531bbb9e7abdcc5125ef059c8bb094fefca
|
3 |
+
size 94238
|
runs/Jan30_08-32-51_node01/events.out.tfevents.1706592282.node01.2675529.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92f73fac3b8a77b32d3a34cfc8e99ac8ba711875787ff1fdad0fc067855c22dc
|
3 |
+
size 828
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 2.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples":
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.0,
|
3 |
+
"train_loss": 0.04402416471998484,
|
4 |
+
"train_runtime": 16535.8816,
|
5 |
+
"train_samples": 11091,
|
6 |
+
"train_samples_per_second": 1.341,
|
7 |
+
"train_steps_per_second": 0.084
|
8 |
}
|
trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5307
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cab8bde6dcfad3dd89a302fa59f2911975e6035429fe94620292ef71e92606e
|
3 |
size 5307
|