weijie210 commited on
Commit
a0b8529
·
verified ·
1 Parent(s): 0e2fea2

Model save

Browse files
README.md CHANGED
@@ -17,15 +17,15 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.0170
21
- - Rewards/chosen: -4.2758
22
- - Rewards/rejected: -26.6670
23
- - Rewards/accuracies: 0.9867
24
- - Rewards/margins: 22.3912
25
- - Logps/rejected: -355.0656
26
- - Logps/chosen: -199.5902
27
- - Logits/rejected: -1.5491
28
- - Logits/chosen: -2.4099
29
 
30
  ## Model description
31
 
@@ -61,30 +61,8 @@ The following hyperparameters were used during training:
61
 
62
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
63
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
64
- | 0.1657 | 0.08 | 100 | 0.1740 | -1.4537 | -4.1705 | 0.9801 | 2.7168 | -130.1008 | -171.3696 | -2.5952 | -2.7207 |
65
- | 0.0782 | 0.16 | 200 | 0.0627 | -0.4404 | -5.7033 | 0.9867 | 5.2629 | -145.4286 | -161.2361 | -2.4357 | -2.5861 |
66
- | 0.0324 | 0.24 | 300 | 0.0367 | -0.9306 | -8.1206 | 0.9886 | 7.1900 | -169.6023 | -166.1387 | -2.3643 | -2.5461 |
67
- | 0.0373 | 0.32 | 400 | 0.0352 | -0.3792 | -7.3599 | 0.9905 | 6.9807 | -161.9944 | -160.6242 | -2.3352 | -2.5608 |
68
- | 0.0151 | 0.4 | 500 | 0.0259 | -2.0957 | -11.1375 | 0.9896 | 9.0418 | -199.7707 | -177.7890 | -2.3514 | -2.5164 |
69
- | 0.0227 | 0.48 | 600 | 0.0228 | -1.5779 | -10.9393 | 0.9905 | 9.3614 | -197.7892 | -172.6115 | -2.2954 | -2.5141 |
70
- | 0.0225 | 0.57 | 700 | 0.0276 | -1.8300 | -10.5178 | 0.9886 | 8.6878 | -193.5734 | -175.1324 | -2.4148 | -2.6502 |
71
- | 0.0251 | 0.65 | 800 | 0.0296 | -4.7354 | -16.6488 | 0.9867 | 11.9133 | -254.8836 | -204.1870 | -2.0531 | -2.4265 |
72
- | 0.0197 | 0.73 | 900 | 0.0184 | -5.0699 | -18.4648 | 0.9924 | 13.3949 | -273.0435 | -207.5311 | -1.6415 | -2.3304 |
73
- | 0.0087 | 0.81 | 1000 | 0.0189 | -3.2908 | -19.6316 | 0.9905 | 16.3408 | -284.7117 | -189.7405 | -1.5669 | -2.2844 |
74
- | 0.025 | 0.89 | 1100 | 0.0161 | -4.9723 | -19.8966 | 0.9924 | 14.9243 | -287.3613 | -206.5555 | -1.9588 | -2.4809 |
75
- | 0.0299 | 0.97 | 1200 | 0.0192 | -7.1736 | -21.2381 | 0.9915 | 14.0645 | -300.7769 | -228.5686 | -1.7745 | -2.3342 |
76
- | 0.0014 | 1.05 | 1300 | 0.0184 | -3.8048 | -21.7578 | 0.9915 | 17.9530 | -305.9737 | -194.8804 | -1.6148 | -2.3594 |
77
- | 0.0015 | 1.13 | 1400 | 0.0153 | -5.9221 | -23.9435 | 0.9905 | 18.0214 | -327.8304 | -216.0534 | -1.6351 | -2.3309 |
78
- | 0.0019 | 1.21 | 1500 | 0.0166 | -3.5050 | -23.2207 | 0.9905 | 19.7157 | -320.6025 | -191.8821 | -1.5712 | -2.4214 |
79
- | 0.0055 | 1.29 | 1600 | 0.0150 | -4.9182 | -24.9562 | 0.9896 | 20.0380 | -337.9579 | -206.0149 | -1.0331 | -2.2401 |
80
- | 0.0019 | 1.37 | 1700 | 0.0151 | -5.7779 | -25.7925 | 0.9896 | 20.0146 | -346.3207 | -214.6113 | -0.9177 | -2.1887 |
81
- | 0.0009 | 1.45 | 1800 | 0.0174 | -4.9655 | -26.7260 | 0.9886 | 21.7605 | -355.6559 | -206.4873 | -0.4171 | -1.9919 |
82
- | 0.0086 | 1.53 | 1900 | 0.0159 | -6.3223 | -27.5136 | 0.9886 | 21.1913 | -363.5322 | -220.0559 | -0.7783 | -2.1291 |
83
- | 0.0009 | 1.62 | 2000 | 0.0148 | -4.9109 | -24.0100 | 0.9905 | 19.0991 | -328.4960 | -205.9413 | -1.6403 | -2.3910 |
84
- | 0.0015 | 1.7 | 2100 | 0.0152 | -4.5612 | -25.2891 | 0.9886 | 20.7279 | -341.2871 | -202.4446 | -1.5789 | -2.3995 |
85
- | 0.0028 | 1.78 | 2200 | 0.0174 | -4.5140 | -26.8924 | 0.9877 | 22.3784 | -357.3196 | -201.9725 | -1.3680 | -2.3331 |
86
- | 0.0018 | 1.86 | 2300 | 0.0166 | -4.5053 | -26.4789 | 0.9867 | 21.9736 | -353.1845 | -201.8854 | -1.5207 | -2.3959 |
87
- | 0.0025 | 1.94 | 2400 | 0.0171 | -4.5227 | -27.0564 | 0.9867 | 22.5337 | -358.9594 | -202.0593 | -1.4889 | -2.3903 |
88
 
89
 
90
  ### Framework versions
 
17
 
18
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.0241
21
+ - Rewards/chosen: -1.3913
22
+ - Rewards/rejected: -26.2733
23
+ - Rewards/accuracies: 0.9940
24
+ - Rewards/margins: 24.8821
25
+ - Logps/rejected: -348.4335
26
+ - Logps/chosen: -162.1273
27
+ - Logits/rejected: -2.3713
28
+ - Logits/chosen: -2.5987
29
 
30
  ## Model description
31
 
 
61
 
62
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
63
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
64
+ | 0.0268 | 0.72 | 500 | 0.0314 | -0.9700 | -9.7922 | 0.9960 | 8.8222 | -183.6220 | -157.9143 | -2.2332 | -2.4086 |
65
+ | 0.0022 | 1.44 | 1000 | 0.0229 | -1.6108 | -24.5526 | 0.9960 | 22.9418 | -331.2263 | -164.3232 | -2.4361 | -2.6330 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
 
68
  ### Framework versions
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "epoch": 2.0,
3
- "eval_logits/chosen": -2.409858465194702,
4
- "eval_logits/rejected": -1.5491485595703125,
5
- "eval_logps/chosen": -199.59024047851562,
6
- "eval_logps/rejected": -355.0655517578125,
7
- "eval_loss": 0.01704823225736618,
8
- "eval_rewards/accuracies": 0.9867424368858337,
9
- "eval_rewards/chosen": -4.275777816772461,
10
- "eval_rewards/margins": 22.391204833984375,
11
- "eval_rewards/rejected": -26.666980743408203,
12
- "eval_runtime": 1260.81,
13
- "eval_samples": 4194,
14
- "eval_samples_per_second": 3.326,
15
- "eval_steps_per_second": 0.105,
16
- "train_loss": 0.034544974909203885,
17
- "train_runtime": 47563.1126,
18
- "train_samples": 19794,
19
- "train_samples_per_second": 0.832,
20
- "train_steps_per_second": 0.052
21
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "eval_logits/chosen": -2.5986759662628174,
4
+ "eval_logits/rejected": -2.371307134628296,
5
+ "eval_logps/chosen": -162.12734985351562,
6
+ "eval_logps/rejected": -348.43353271484375,
7
+ "eval_loss": 0.024077776819467545,
8
+ "eval_rewards/accuracies": 0.9940476417541504,
9
+ "eval_rewards/chosen": -1.3912619352340698,
10
+ "eval_rewards/margins": 24.882064819335938,
11
+ "eval_rewards/rejected": -26.27332878112793,
12
+ "eval_runtime": 908.8237,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 2.201,
15
+ "eval_steps_per_second": 0.069,
16
+ "train_loss": 0.04402416471998484,
17
+ "train_runtime": 16535.8816,
18
+ "train_samples": 11091,
19
+ "train_samples_per_second": 1.341,
20
+ "train_steps_per_second": 0.084
21
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 2.0,
3
- "eval_logits/chosen": -2.409858465194702,
4
- "eval_logits/rejected": -1.5491485595703125,
5
- "eval_logps/chosen": -199.59024047851562,
6
- "eval_logps/rejected": -355.0655517578125,
7
- "eval_loss": 0.01704823225736618,
8
- "eval_rewards/accuracies": 0.9867424368858337,
9
- "eval_rewards/chosen": -4.275777816772461,
10
- "eval_rewards/margins": 22.391204833984375,
11
- "eval_rewards/rejected": -26.666980743408203,
12
- "eval_runtime": 1260.81,
13
- "eval_samples": 4194,
14
- "eval_samples_per_second": 3.326,
15
- "eval_steps_per_second": 0.105
16
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "eval_logits/chosen": -2.5986759662628174,
4
+ "eval_logits/rejected": -2.371307134628296,
5
+ "eval_logps/chosen": -162.12734985351562,
6
+ "eval_logps/rejected": -348.43353271484375,
7
+ "eval_loss": 0.024077776819467545,
8
+ "eval_rewards/accuracies": 0.9940476417541504,
9
+ "eval_rewards/chosen": -1.3912619352340698,
10
+ "eval_rewards/margins": 24.882064819335938,
11
+ "eval_rewards/rejected": -26.27332878112793,
12
+ "eval_runtime": 908.8237,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 2.201,
15
+ "eval_steps_per_second": 0.069
16
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:797ce6909e405f768ad9e4d7d0dffab8f4cd1465b43ef4e9f992163b0dd3c41d
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:311f0ae6fb60963e9e1041bbdc07c5c3e1e531d1a30ae23f11f6b5aa5e175b80
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96d63555a75ecfcb7394ba986282a5a301e4ff1e94dc57cf06b02a33125418a5
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df859f7d4ece46c79afb3a10b87508a2ddd673e18b249e3fecb61f5d303d2bc8
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d77f15c300a7ae9e7c61d33f79535887416eb8bcf7962e107f404c8493fdc24e
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44c2a993ceebaaef856e5e0620fbea210b968ad7638cb94767bd33dc7fa947f7
3
  size 4540516344
runs/Jan30_08-23-58_node01/events.out.tfevents.1706574583.node01.2673416.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b0c58c5e7b7206b30e4f09260e4eb25a633bbacd045487925d5c1653a8a0906
3
+ size 5667
runs/Jan30_08-32-51_node01/events.out.tfevents.1706574838.node01.2675529.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f1c71f90b702c8c444e57d9e3c3a531bbb9e7abdcc5125ef059c8bb094fefca
3
+ size 94238
runs/Jan30_08-32-51_node01/events.out.tfevents.1706592282.node01.2675529.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92f73fac3b8a77b32d3a34cfc8e99ac8ba711875787ff1fdad0fc067855c22dc
3
+ size 828
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
- "train_loss": 0.034544974909203885,
4
- "train_runtime": 47563.1126,
5
- "train_samples": 19794,
6
- "train_samples_per_second": 0.832,
7
- "train_steps_per_second": 0.052
8
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "train_loss": 0.04402416471998484,
4
+ "train_runtime": 16535.8816,
5
+ "train_samples": 11091,
6
+ "train_samples_per_second": 1.341,
7
+ "train_steps_per_second": 0.084
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:565022c5aa4fdb89913e86c5ae03d628ea722ee201f1f87b9d93dfd071a6b063
3
  size 5307
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cab8bde6dcfad3dd89a302fa59f2911975e6035429fe94620292ef71e92606e
3
  size 5307