Model save
Browse files- README.md +10 -16
- all_results.json +4 -17
- config.json +1 -1
- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- runs/Apr27_13-04-12_gcp002/events.out.tfevents.1714225073.gcp002.2844214.0 +3 -0
- train_results.json +4 -4
- trainer_state.json +118 -118
- training_args.bin +1 -1
README.md
CHANGED
@@ -2,15 +2,9 @@
|
|
2 |
license: other
|
3 |
base_model: HuggingFaceH4/zephyr-7b-gemma-sft-v0.1
|
4 |
tags:
|
5 |
-
- alignment-handbook
|
6 |
- trl
|
7 |
- dpo
|
8 |
- generated_from_trainer
|
9 |
-
- trl
|
10 |
-
- dpo
|
11 |
-
- generated_from_trainer
|
12 |
-
datasets:
|
13 |
-
- argilla/dpo-mix-7k
|
14 |
model-index:
|
15 |
- name: zephyr-7b-gemma-dpo
|
16 |
results: []
|
@@ -21,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
|
|
21 |
|
22 |
# zephyr-7b-gemma-dpo
|
23 |
|
24 |
-
This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-gemma-sft-v0.1](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-sft-v0.1) on
|
25 |
It achieves the following results on the evaluation set:
|
26 |
-
- Loss: 0.
|
27 |
-
- Rewards/chosen: -
|
28 |
-
- Rewards/rejected: -
|
29 |
- Rewards/accuracies: 0.7604
|
30 |
-
- Rewards/margins: 1.
|
31 |
-
- Logps/rejected: -
|
32 |
-
- Logps/chosen: -
|
33 |
-
- Logits/rejected:
|
34 |
-
- Logits/chosen: 96.
|
35 |
|
36 |
## Model description
|
37 |
|
@@ -68,7 +62,7 @@ The following hyperparameters were used during training:
|
|
68 |
|
69 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
70 |
|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
71 |
-
| 0.
|
72 |
|
73 |
|
74 |
### Framework versions
|
|
|
2 |
license: other
|
3 |
base_model: HuggingFaceH4/zephyr-7b-gemma-sft-v0.1
|
4 |
tags:
|
|
|
5 |
- trl
|
6 |
- dpo
|
7 |
- generated_from_trainer
|
|
|
|
|
|
|
|
|
|
|
8 |
model-index:
|
9 |
- name: zephyr-7b-gemma-dpo
|
10 |
results: []
|
|
|
15 |
|
16 |
# zephyr-7b-gemma-dpo
|
17 |
|
18 |
+
This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-gemma-sft-v0.1](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-sft-v0.1) on an unknown dataset.
|
19 |
It achieves the following results on the evaluation set:
|
20 |
+
- Loss: 0.4692
|
21 |
+
- Rewards/chosen: -2.9810
|
22 |
+
- Rewards/rejected: -4.6451
|
23 |
- Rewards/accuracies: 0.7604
|
24 |
+
- Rewards/margins: 1.6641
|
25 |
+
- Logps/rejected: -453.0530
|
26 |
+
- Logps/chosen: -422.7995
|
27 |
+
- Logits/rejected: 91.0574
|
28 |
+
- Logits/chosen: 96.7661
|
29 |
|
30 |
## Model description
|
31 |
|
|
|
62 |
|
63 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
64 |
|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
65 |
+
| 0.1591 | 1.8957 | 100 | 0.4692 | -2.9810 | -4.6451 | 0.7604 | 1.6641 | -453.0530 | -422.7995 | 91.0574 | 96.7661 |
|
66 |
|
67 |
|
68 |
### Framework versions
|
all_results.json
CHANGED
@@ -1,22 +1,9 @@
|
|
1 |
{
|
2 |
"epoch": 1.971563981042654,
|
3 |
-
"eval_logits/chosen": 96.0078125,
|
4 |
-
"eval_logits/rejected": 90.1099853515625,
|
5 |
-
"eval_logps/chosen": -434.6161193847656,
|
6 |
-
"eval_logps/rejected": -466.3072814941406,
|
7 |
-
"eval_loss": 0.4578173756599426,
|
8 |
-
"eval_rewards/accuracies": 0.7604166865348816,
|
9 |
-
"eval_rewards/chosen": -3.5473945140838623,
|
10 |
-
"eval_rewards/margins": 1.6644223928451538,
|
11 |
-
"eval_rewards/rejected": -5.211816787719727,
|
12 |
-
"eval_runtime": 124.9704,
|
13 |
-
"eval_samples": 750,
|
14 |
-
"eval_samples_per_second": 6.001,
|
15 |
-
"eval_steps_per_second": 0.192,
|
16 |
"total_flos": 0.0,
|
17 |
-
"train_loss": 0.
|
18 |
-
"train_runtime":
|
19 |
"train_samples": 6750,
|
20 |
-
"train_samples_per_second":
|
21 |
-
"train_steps_per_second": 0.
|
22 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.971563981042654,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.3921648321243433,
|
5 |
+
"train_runtime": 1190.3032,
|
6 |
"train_samples": 6750,
|
7 |
+
"train_samples_per_second": 11.342,
|
8 |
+
"train_steps_per_second": 0.087
|
9 |
}
|
config.json
CHANGED
@@ -24,6 +24,6 @@
|
|
24 |
"rope_theta": 10000.0,
|
25 |
"torch_dtype": "bfloat16",
|
26 |
"transformers_version": "4.40.1",
|
27 |
-
"use_cache":
|
28 |
"vocab_size": 256000
|
29 |
}
|
|
|
24 |
"rope_theta": 10000.0,
|
25 |
"torch_dtype": "bfloat16",
|
26 |
"transformers_version": "4.40.1",
|
27 |
+
"use_cache": false,
|
28 |
"vocab_size": 256000
|
29 |
}
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4995496656
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c5a7396241c9049d5aa615a0081a16f922ac27a595fad308da7be35b297a132
|
3 |
size 4995496656
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4982953168
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd5dd371acf66dbd26a13036c2b42efee4bf540edd178d9cb3a225eccd87d21c
|
3 |
size 4982953168
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4982953200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:836f574fd62d28f4d282b81d3712708535d3eadc45ddd0509c932611129327c5
|
3 |
size 4982953200
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2113988336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85fb13399e2932fccc4212b3907e81ae00bcdf3dbbb785a234bceaa501d29091
|
3 |
size 2113988336
|
runs/Apr27_13-04-12_gcp002/events.out.tfevents.1714225073.gcp002.2844214.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52fbc293ec4d235cf3059606bc8b85f87a5b0758b3840ab0f43bccf065f0dd9c
|
3 |
+
size 13440
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"epoch": 1.971563981042654,
|
3 |
"total_flos": 0.0,
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
"train_samples": 6750,
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second": 0.
|
9 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.971563981042654,
|
3 |
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.3921648321243433,
|
5 |
+
"train_runtime": 1190.3032,
|
6 |
"train_samples": 6750,
|
7 |
+
"train_samples_per_second": 11.342,
|
8 |
+
"train_steps_per_second": 0.087
|
9 |
}
|
trainer_state.json
CHANGED
@@ -10,12 +10,12 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.018957345971563982,
|
13 |
-
"grad_norm":
|
14 |
"learning_rate": 4.545454545454545e-08,
|
15 |
-
"logits/chosen": 117.
|
16 |
-
"logits/rejected": 126.
|
17 |
-
"logps/chosen": -
|
18 |
-
"logps/rejected": -
|
19 |
"loss": 0.6931,
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
@@ -25,178 +25,178 @@
|
|
25 |
},
|
26 |
{
|
27 |
"epoch": 0.1895734597156398,
|
28 |
-
"grad_norm":
|
29 |
"learning_rate": 4.545454545454545e-07,
|
30 |
-
"logits/chosen": 135.
|
31 |
-
"logits/rejected": 138.
|
32 |
-
"logps/chosen": -
|
33 |
-
"logps/rejected": -439.
|
34 |
-
"loss": 0.
|
35 |
-
"rewards/accuracies": 0.
|
36 |
-
"rewards/chosen": 0.
|
37 |
-
"rewards/margins": 0.
|
38 |
-
"rewards/rejected":
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
"epoch": 0.3791469194312796,
|
43 |
-
"grad_norm":
|
44 |
"learning_rate": 4.885348141000122e-07,
|
45 |
-
"logits/chosen": 121.
|
46 |
-
"logits/rejected": 125.
|
47 |
-
"logps/chosen": -
|
48 |
-
"logps/rejected": -
|
49 |
-
"loss": 0.
|
50 |
-
"rewards/accuracies": 0.
|
51 |
-
"rewards/chosen": 0.
|
52 |
-
"rewards/margins": 0.
|
53 |
-
"rewards/rejected": -0.
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
"epoch": 0.5687203791469194,
|
58 |
-
"grad_norm":
|
59 |
"learning_rate": 4.5025027361734613e-07,
|
60 |
-
"logits/chosen": 142.
|
61 |
-
"logits/rejected":
|
62 |
-
"logps/chosen": -
|
63 |
-
"logps/rejected": -
|
64 |
-
"loss": 0.
|
65 |
-
"rewards/accuracies": 0.
|
66 |
-
"rewards/chosen": -1.
|
67 |
-
"rewards/margins": 0.
|
68 |
-
"rewards/rejected": -
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
"epoch": 0.7582938388625592,
|
73 |
-
"grad_norm":
|
74 |
"learning_rate": 3.893311157806091e-07,
|
75 |
-
"logits/chosen": 124.
|
76 |
-
"logits/rejected":
|
77 |
-
"logps/chosen": -
|
78 |
-
"logps/rejected": -
|
79 |
-
"loss": 0.
|
80 |
-
"rewards/accuracies": 0.
|
81 |
-
"rewards/chosen": -
|
82 |
-
"rewards/margins": 1.
|
83 |
-
"rewards/rejected": -
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
"epoch": 0.9478672985781991,
|
88 |
-
"grad_norm": 126.
|
89 |
"learning_rate": 3.126631330646801e-07,
|
90 |
-
"logits/chosen":
|
91 |
-
"logits/rejected":
|
92 |
-
"logps/chosen": -
|
93 |
-
"logps/rejected": -
|
94 |
-
"loss": 0.
|
95 |
-
"rewards/accuracies": 0.
|
96 |
-
"rewards/chosen": -
|
97 |
-
"rewards/margins": 1.
|
98 |
-
"rewards/rejected": -3.
|
99 |
"step": 50
|
100 |
},
|
101 |
{
|
102 |
"epoch": 1.1374407582938388,
|
103 |
-
"grad_norm":
|
104 |
"learning_rate": 2.2891223348923882e-07,
|
105 |
-
"logits/chosen": 131.
|
106 |
-
"logits/rejected":
|
107 |
-
"logps/chosen": -
|
108 |
-
"logps/rejected": -
|
109 |
-
"loss": 0.
|
110 |
-
"rewards/accuracies": 0.
|
111 |
-
"rewards/chosen": -
|
112 |
-
"rewards/margins": 2.
|
113 |
-
"rewards/rejected": -4.
|
114 |
"step": 60
|
115 |
},
|
116 |
{
|
117 |
"epoch": 1.3270142180094786,
|
118 |
-
"grad_norm":
|
119 |
"learning_rate": 1.4754491880085317e-07,
|
120 |
-
"logits/chosen": 124.
|
121 |
-
"logits/rejected": 126.
|
122 |
-
"logps/chosen": -
|
123 |
-
"logps/rejected": -
|
124 |
-
"loss": 0.
|
125 |
"rewards/accuracies": 0.9312499761581421,
|
126 |
-
"rewards/chosen": -
|
127 |
-
"rewards/margins": 2.
|
128 |
-
"rewards/rejected": -
|
129 |
"step": 70
|
130 |
},
|
131 |
{
|
132 |
"epoch": 1.5165876777251186,
|
133 |
-
"grad_norm":
|
134 |
"learning_rate": 7.775827023107834e-08,
|
135 |
-
"logits/chosen":
|
136 |
-
"logits/rejected":
|
137 |
-
"logps/chosen": -
|
138 |
-
"logps/rejected": -
|
139 |
-
"loss": 0.
|
140 |
"rewards/accuracies": 0.9624999761581421,
|
141 |
-
"rewards/chosen": -
|
142 |
-
"rewards/margins": 2.
|
143 |
-
"rewards/rejected": -
|
144 |
"step": 80
|
145 |
},
|
146 |
{
|
147 |
"epoch": 1.7061611374407581,
|
148 |
-
"grad_norm":
|
149 |
"learning_rate": 2.7440387297912122e-08,
|
150 |
-
"logits/chosen": 110.
|
151 |
-
"logits/rejected":
|
152 |
-
"logps/chosen": -
|
153 |
-
"logps/rejected": -
|
154 |
-
"loss": 0.
|
155 |
-
"rewards/accuracies": 0.
|
156 |
-
"rewards/chosen": -
|
157 |
-
"rewards/margins": 3.
|
158 |
-
"rewards/rejected": -
|
159 |
"step": 90
|
160 |
},
|
161 |
{
|
162 |
"epoch": 1.8957345971563981,
|
163 |
-
"grad_norm":
|
164 |
"learning_rate": 2.27878296044029e-09,
|
165 |
-
"logits/chosen":
|
166 |
-
"logits/rejected":
|
167 |
-
"logps/chosen": -
|
168 |
-
"logps/rejected": -
|
169 |
-
"loss": 0.
|
170 |
-
"rewards/accuracies": 0.
|
171 |
-
"rewards/chosen": -2.
|
172 |
-
"rewards/margins": 2.
|
173 |
-
"rewards/rejected": -5.
|
174 |
"step": 100
|
175 |
},
|
176 |
{
|
177 |
"epoch": 1.8957345971563981,
|
178 |
-
"eval_logits/chosen": 96.
|
179 |
-
"eval_logits/rejected":
|
180 |
-
"eval_logps/chosen": -
|
181 |
-
"eval_logps/rejected": -
|
182 |
-
"eval_loss": 0.
|
183 |
"eval_rewards/accuracies": 0.7604166865348816,
|
184 |
-
"eval_rewards/chosen": -
|
185 |
-
"eval_rewards/margins": 1.
|
186 |
-
"eval_rewards/rejected": -
|
187 |
-
"eval_runtime":
|
188 |
-
"eval_samples_per_second":
|
189 |
-
"eval_steps_per_second": 0.
|
190 |
"step": 100
|
191 |
},
|
192 |
{
|
193 |
"epoch": 1.971563981042654,
|
194 |
"step": 104,
|
195 |
"total_flos": 0.0,
|
196 |
-
"train_loss": 0.
|
197 |
-
"train_runtime":
|
198 |
-
"train_samples_per_second":
|
199 |
-
"train_steps_per_second": 0.
|
200 |
}
|
201 |
],
|
202 |
"logging_steps": 10,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.018957345971563982,
|
13 |
+
"grad_norm": 133.64647421295854,
|
14 |
"learning_rate": 4.545454545454545e-08,
|
15 |
+
"logits/chosen": 117.4909439086914,
|
16 |
+
"logits/rejected": 126.8502426147461,
|
17 |
+
"logps/chosen": -338.3250732421875,
|
18 |
+
"logps/rejected": -438.210205078125,
|
19 |
"loss": 0.6931,
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
|
|
25 |
},
|
26 |
{
|
27 |
"epoch": 0.1895734597156398,
|
28 |
+
"grad_norm": 137.17714765050428,
|
29 |
"learning_rate": 4.545454545454545e-07,
|
30 |
+
"logits/chosen": 135.0139923095703,
|
31 |
+
"logits/rejected": 138.361328125,
|
32 |
+
"logps/chosen": -397.126220703125,
|
33 |
+
"logps/rejected": -439.42083740234375,
|
34 |
+
"loss": 0.7143,
|
35 |
+
"rewards/accuracies": 0.3888888955116272,
|
36 |
+
"rewards/chosen": -0.02168009988963604,
|
37 |
+
"rewards/margins": -0.04445798695087433,
|
38 |
+
"rewards/rejected": 0.02277788519859314,
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
"epoch": 0.3791469194312796,
|
43 |
+
"grad_norm": 125.18497680199994,
|
44 |
"learning_rate": 4.885348141000122e-07,
|
45 |
+
"logits/chosen": 121.53276062011719,
|
46 |
+
"logits/rejected": 125.26307678222656,
|
47 |
+
"logps/chosen": -370.91107177734375,
|
48 |
+
"logps/rejected": -423.38677978515625,
|
49 |
+
"loss": 0.6418,
|
50 |
+
"rewards/accuracies": 0.5874999761581421,
|
51 |
+
"rewards/chosen": 0.08632902055978775,
|
52 |
+
"rewards/margins": 0.28587669134140015,
|
53 |
+
"rewards/rejected": -0.199547678232193,
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
"epoch": 0.5687203791469194,
|
58 |
+
"grad_norm": 115.98151525592598,
|
59 |
"learning_rate": 4.5025027361734613e-07,
|
60 |
+
"logits/chosen": 142.67178344726562,
|
61 |
+
"logits/rejected": 136.16537475585938,
|
62 |
+
"logps/chosen": -415.01104736328125,
|
63 |
+
"logps/rejected": -460.3519592285156,
|
64 |
+
"loss": 0.5673,
|
65 |
+
"rewards/accuracies": 0.668749988079071,
|
66 |
+
"rewards/chosen": -1.1033741235733032,
|
67 |
+
"rewards/margins": 0.8893669843673706,
|
68 |
+
"rewards/rejected": -1.9927412271499634,
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
"epoch": 0.7582938388625592,
|
73 |
+
"grad_norm": 112.04677171325864,
|
74 |
"learning_rate": 3.893311157806091e-07,
|
75 |
+
"logits/chosen": 124.56459045410156,
|
76 |
+
"logits/rejected": 113.08979797363281,
|
77 |
+
"logps/chosen": -391.10174560546875,
|
78 |
+
"logps/rejected": -417.09051513671875,
|
79 |
+
"loss": 0.5586,
|
80 |
+
"rewards/accuracies": 0.71875,
|
81 |
+
"rewards/chosen": -1.8466203212738037,
|
82 |
+
"rewards/margins": 1.0624934434890747,
|
83 |
+
"rewards/rejected": -2.909113883972168,
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
"epoch": 0.9478672985781991,
|
88 |
+
"grad_norm": 126.0267199667638,
|
89 |
"learning_rate": 3.126631330646801e-07,
|
90 |
+
"logits/chosen": 138.824462890625,
|
91 |
+
"logits/rejected": 142.9259033203125,
|
92 |
+
"logps/chosen": -455.6646423339844,
|
93 |
+
"logps/rejected": -536.987548828125,
|
94 |
+
"loss": 0.4941,
|
95 |
+
"rewards/accuracies": 0.731249988079071,
|
96 |
+
"rewards/chosen": -1.8371152877807617,
|
97 |
+
"rewards/margins": 1.2058273553848267,
|
98 |
+
"rewards/rejected": -3.042942523956299,
|
99 |
"step": 50
|
100 |
},
|
101 |
{
|
102 |
"epoch": 1.1374407582938388,
|
103 |
+
"grad_norm": 63.02753605606795,
|
104 |
"learning_rate": 2.2891223348923882e-07,
|
105 |
+
"logits/chosen": 131.37802124023438,
|
106 |
+
"logits/rejected": 134.72222900390625,
|
107 |
+
"logps/chosen": -436.17047119140625,
|
108 |
+
"logps/rejected": -520.2355346679688,
|
109 |
+
"loss": 0.3078,
|
110 |
+
"rewards/accuracies": 0.918749988079071,
|
111 |
+
"rewards/chosen": -1.9126123189926147,
|
112 |
+
"rewards/margins": 2.285891056060791,
|
113 |
+
"rewards/rejected": -4.198503017425537,
|
114 |
"step": 60
|
115 |
},
|
116 |
{
|
117 |
"epoch": 1.3270142180094786,
|
118 |
+
"grad_norm": 59.15589622996558,
|
119 |
"learning_rate": 1.4754491880085317e-07,
|
120 |
+
"logits/chosen": 124.51689147949219,
|
121 |
+
"logits/rejected": 126.70524597167969,
|
122 |
+
"logps/chosen": -402.62066650390625,
|
123 |
+
"logps/rejected": -505.50006103515625,
|
124 |
+
"loss": 0.1932,
|
125 |
"rewards/accuracies": 0.9312499761581421,
|
126 |
+
"rewards/chosen": -1.7549495697021484,
|
127 |
+
"rewards/margins": 2.8932533264160156,
|
128 |
+
"rewards/rejected": -4.648203372955322,
|
129 |
"step": 70
|
130 |
},
|
131 |
{
|
132 |
"epoch": 1.5165876777251186,
|
133 |
+
"grad_norm": 44.24206971141979,
|
134 |
"learning_rate": 7.775827023107834e-08,
|
135 |
+
"logits/chosen": 111.74947357177734,
|
136 |
+
"logits/rejected": 128.5332489013672,
|
137 |
+
"logps/chosen": -400.06146240234375,
|
138 |
+
"logps/rejected": -519.4473876953125,
|
139 |
+
"loss": 0.1687,
|
140 |
"rewards/accuracies": 0.9624999761581421,
|
141 |
+
"rewards/chosen": -2.326984167098999,
|
142 |
+
"rewards/margins": 2.937407970428467,
|
143 |
+
"rewards/rejected": -5.264392375946045,
|
144 |
"step": 80
|
145 |
},
|
146 |
{
|
147 |
"epoch": 1.7061611374407581,
|
148 |
+
"grad_norm": 50.07584592888485,
|
149 |
"learning_rate": 2.7440387297912122e-08,
|
150 |
+
"logits/chosen": 110.84814453125,
|
151 |
+
"logits/rejected": 123.78230285644531,
|
152 |
+
"logps/chosen": -435.03265380859375,
|
153 |
+
"logps/rejected": -550.7723388671875,
|
154 |
+
"loss": 0.1579,
|
155 |
+
"rewards/accuracies": 0.9750000238418579,
|
156 |
+
"rewards/chosen": -2.400338649749756,
|
157 |
+
"rewards/margins": 3.238767623901367,
|
158 |
+
"rewards/rejected": -5.639105796813965,
|
159 |
"step": 90
|
160 |
},
|
161 |
{
|
162 |
"epoch": 1.8957345971563981,
|
163 |
+
"grad_norm": 49.65552371508206,
|
164 |
"learning_rate": 2.27878296044029e-09,
|
165 |
+
"logits/chosen": 117.1094970703125,
|
166 |
+
"logits/rejected": 117.060302734375,
|
167 |
+
"logps/chosen": -427.23431396484375,
|
168 |
+
"logps/rejected": -520.2066650390625,
|
169 |
+
"loss": 0.1591,
|
170 |
+
"rewards/accuracies": 0.949999988079071,
|
171 |
+
"rewards/chosen": -2.2608113288879395,
|
172 |
+
"rewards/margins": 2.9113571643829346,
|
173 |
+
"rewards/rejected": -5.172169208526611,
|
174 |
"step": 100
|
175 |
},
|
176 |
{
|
177 |
"epoch": 1.8957345971563981,
|
178 |
+
"eval_logits/chosen": 96.76607513427734,
|
179 |
+
"eval_logits/rejected": 91.05736541748047,
|
180 |
+
"eval_logps/chosen": -422.7994689941406,
|
181 |
+
"eval_logps/rejected": -453.052978515625,
|
182 |
+
"eval_loss": 0.4691648781299591,
|
183 |
"eval_rewards/accuracies": 0.7604166865348816,
|
184 |
+
"eval_rewards/chosen": -2.9809672832489014,
|
185 |
+
"eval_rewards/margins": 1.6640973091125488,
|
186 |
+
"eval_rewards/rejected": -4.645064353942871,
|
187 |
+
"eval_runtime": 56.9732,
|
188 |
+
"eval_samples_per_second": 13.164,
|
189 |
+
"eval_steps_per_second": 0.421,
|
190 |
"step": 100
|
191 |
},
|
192 |
{
|
193 |
"epoch": 1.971563981042654,
|
194 |
"step": 104,
|
195 |
"total_flos": 0.0,
|
196 |
+
"train_loss": 0.3921648321243433,
|
197 |
+
"train_runtime": 1190.3032,
|
198 |
+
"train_samples_per_second": 11.342,
|
199 |
+
"train_steps_per_second": 0.087
|
200 |
}
|
201 |
],
|
202 |
"logging_steps": 10,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:912545168b662aa28a59d2ea7ff1a88af69cd74e0b822da195076b8d4f0f07e5
|
3 |
size 6264
|