chrlu commited on
Commit
03cad8d
·
verified ·
1 Parent(s): e3c600f

Model save

Browse files
README.md CHANGED
@@ -2,15 +2,9 @@
2
  license: other
3
  base_model: HuggingFaceH4/zephyr-7b-gemma-sft-v0.1
4
  tags:
5
- - alignment-handbook
6
  - trl
7
  - dpo
8
  - generated_from_trainer
9
- - trl
10
- - dpo
11
- - generated_from_trainer
12
- datasets:
13
- - argilla/dpo-mix-7k
14
  model-index:
15
  - name: zephyr-7b-gemma-dpo
16
  results: []
@@ -21,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  # zephyr-7b-gemma-dpo
23
 
24
- This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-gemma-sft-v0.1](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-sft-v0.1) on the argilla/dpo-mix-7k dataset.
25
  It achieves the following results on the evaluation set:
26
- - Loss: 0.4578
27
- - Rewards/chosen: -3.5474
28
- - Rewards/rejected: -5.2118
29
  - Rewards/accuracies: 0.7604
30
- - Rewards/margins: 1.6644
31
- - Logps/rejected: -466.3073
32
- - Logps/chosen: -434.6161
33
- - Logits/rejected: 90.1100
34
- - Logits/chosen: 96.0078
35
 
36
  ## Model description
37
 
@@ -68,7 +62,7 @@ The following hyperparameters were used during training:
68
 
69
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
70
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
71
- | 0.1582 | 1.8957 | 100 | 0.4427 | -3.5439 | -5.2417 | 0.7604 | 1.6978 | -466.9060 | -434.5467 | 90.1294 | 96.0169 |
72
 
73
 
74
  ### Framework versions
 
2
  license: other
3
  base_model: HuggingFaceH4/zephyr-7b-gemma-sft-v0.1
4
  tags:
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
 
 
 
 
 
8
  model-index:
9
  - name: zephyr-7b-gemma-dpo
10
  results: []
 
15
 
16
  # zephyr-7b-gemma-dpo
17
 
18
+ This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-gemma-sft-v0.1](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-sft-v0.1) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.4692
21
+ - Rewards/chosen: -2.9810
22
+ - Rewards/rejected: -4.6451
23
  - Rewards/accuracies: 0.7604
24
+ - Rewards/margins: 1.6641
25
+ - Logps/rejected: -453.0530
26
+ - Logps/chosen: -422.7995
27
+ - Logits/rejected: 91.0574
28
+ - Logits/chosen: 96.7661
29
 
30
  ## Model description
31
 
 
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
64
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
65
+ | 0.1591 | 1.8957 | 100 | 0.4692 | -2.9810 | -4.6451 | 0.7604 | 1.6641 | -453.0530 | -422.7995 | 91.0574 | 96.7661 |
66
 
67
 
68
  ### Framework versions
all_results.json CHANGED
@@ -1,22 +1,9 @@
1
  {
2
  "epoch": 1.971563981042654,
3
- "eval_logits/chosen": 96.0078125,
4
- "eval_logits/rejected": 90.1099853515625,
5
- "eval_logps/chosen": -434.6161193847656,
6
- "eval_logps/rejected": -466.3072814941406,
7
- "eval_loss": 0.4578173756599426,
8
- "eval_rewards/accuracies": 0.7604166865348816,
9
- "eval_rewards/chosen": -3.5473945140838623,
10
- "eval_rewards/margins": 1.6644223928451538,
11
- "eval_rewards/rejected": -5.211816787719727,
12
- "eval_runtime": 124.9704,
13
- "eval_samples": 750,
14
- "eval_samples_per_second": 6.001,
15
- "eval_steps_per_second": 0.192,
16
  "total_flos": 0.0,
17
- "train_loss": 0.391697224516135,
18
- "train_runtime": 2305.723,
19
  "train_samples": 6750,
20
- "train_samples_per_second": 5.855,
21
- "train_steps_per_second": 0.045
22
  }
 
1
  {
2
  "epoch": 1.971563981042654,
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "total_flos": 0.0,
4
+ "train_loss": 0.3921648321243433,
5
+ "train_runtime": 1190.3032,
6
  "train_samples": 6750,
7
+ "train_samples_per_second": 11.342,
8
+ "train_steps_per_second": 0.087
9
  }
config.json CHANGED
@@ -24,6 +24,6 @@
24
  "rope_theta": 10000.0,
25
  "torch_dtype": "bfloat16",
26
  "transformers_version": "4.40.1",
27
- "use_cache": true,
28
  "vocab_size": 256000
29
  }
 
24
  "rope_theta": 10000.0,
25
  "torch_dtype": "bfloat16",
26
  "transformers_version": "4.40.1",
27
+ "use_cache": false,
28
  "vocab_size": 256000
29
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57be32a3428e1f2d925d42c93da364a2ff0b1cec6450fae99b663844b4353911
3
  size 4995496656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c5a7396241c9049d5aa615a0081a16f922ac27a595fad308da7be35b297a132
3
  size 4995496656
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53b2b463e3960df5c2d6061c8298bfef88abf4cbb236fcf2ef79e7f8a296bb3d
3
  size 4982953168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd5dd371acf66dbd26a13036c2b42efee4bf540edd178d9cb3a225eccd87d21c
3
  size 4982953168
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e567f4df2dbadafbf8c26a32627496c7fbd6db12d83a3346773b6b9fac86a995
3
  size 4982953200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:836f574fd62d28f4d282b81d3712708535d3eadc45ddd0509c932611129327c5
3
  size 4982953200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af13efaf67010f78192a6b8de5992c22ee84ebcdddcefd83cd8ad305d497ecc8
3
  size 2113988336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85fb13399e2932fccc4212b3907e81ae00bcdf3dbbb785a234bceaa501d29091
3
  size 2113988336
runs/Apr27_13-04-12_gcp002/events.out.tfevents.1714225073.gcp002.2844214.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52fbc293ec4d235cf3059606bc8b85f87a5b0758b3840ab0f43bccf065f0dd9c
3
+ size 13440
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.971563981042654,
3
  "total_flos": 0.0,
4
- "train_loss": 0.391697224516135,
5
- "train_runtime": 2305.723,
6
  "train_samples": 6750,
7
- "train_samples_per_second": 5.855,
8
- "train_steps_per_second": 0.045
9
  }
 
1
  {
2
  "epoch": 1.971563981042654,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.3921648321243433,
5
+ "train_runtime": 1190.3032,
6
  "train_samples": 6750,
7
+ "train_samples_per_second": 11.342,
8
+ "train_steps_per_second": 0.087
9
  }
trainer_state.json CHANGED
@@ -10,12 +10,12 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.018957345971563982,
13
- "grad_norm": 132.14458710760462,
14
  "learning_rate": 4.545454545454545e-08,
15
- "logits/chosen": 117.53560638427734,
16
- "logits/rejected": 126.8960952758789,
17
- "logps/chosen": -335.40118408203125,
18
- "logps/rejected": -439.16552734375,
19
  "loss": 0.6931,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
@@ -25,178 +25,178 @@
25
  },
26
  {
27
  "epoch": 0.1895734597156398,
28
- "grad_norm": 129.17456289198574,
29
  "learning_rate": 4.545454545454545e-07,
30
- "logits/chosen": 135.00433349609375,
31
- "logits/rejected": 138.3448028564453,
32
- "logps/chosen": -395.86181640625,
33
- "logps/rejected": -439.7121887207031,
34
- "loss": 0.7118,
35
- "rewards/accuracies": 0.4097222089767456,
36
- "rewards/chosen": 0.006736809387803078,
37
- "rewards/margins": 0.02597012370824814,
38
- "rewards/rejected": -0.019233308732509613,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.3791469194312796,
43
- "grad_norm": 126.56766201489141,
44
  "learning_rate": 4.885348141000122e-07,
45
- "logits/chosen": 121.39383697509766,
46
- "logits/rejected": 125.12164306640625,
47
- "logps/chosen": -371.44940185546875,
48
- "logps/rejected": -424.017822265625,
49
- "loss": 0.6352,
50
- "rewards/accuracies": 0.612500011920929,
51
- "rewards/chosen": 0.0481288805603981,
52
- "rewards/margins": 0.24945171177387238,
53
- "rewards/rejected": -0.20132283866405487,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.5687203791469194,
58
- "grad_norm": 113.84442623510158,
59
  "learning_rate": 4.5025027361734613e-07,
60
- "logits/chosen": 142.023681640625,
61
- "logits/rejected": 135.55198669433594,
62
- "logps/chosen": -417.58203125,
63
- "logps/rejected": -462.6326599121094,
64
- "loss": 0.5761,
65
- "rewards/accuracies": 0.706250011920929,
66
- "rewards/chosen": -1.255840539932251,
67
- "rewards/margins": 0.8756818771362305,
68
- "rewards/rejected": -2.1315224170684814,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.7582938388625592,
73
- "grad_norm": 108.20955894474618,
74
  "learning_rate": 3.893311157806091e-07,
75
- "logits/chosen": 124.3506088256836,
76
- "logits/rejected": 112.85099792480469,
77
- "logps/chosen": -398.3126220703125,
78
- "logps/rejected": -424.8102111816406,
79
- "loss": 0.5433,
80
- "rewards/accuracies": 0.7749999761581421,
81
- "rewards/chosen": -2.205923080444336,
82
- "rewards/margins": 1.1408246755599976,
83
- "rewards/rejected": -3.346747636795044,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.9478672985781991,
88
- "grad_norm": 126.72490253404305,
89
  "learning_rate": 3.126631330646801e-07,
90
- "logits/chosen": 139.5634002685547,
91
- "logits/rejected": 143.65257263183594,
92
- "logps/chosen": -460.2559509277344,
93
- "logps/rejected": -542.6179809570312,
94
- "loss": 0.4938,
95
- "rewards/accuracies": 0.7437499761581421,
96
- "rewards/chosen": -2.055608034133911,
97
- "rewards/margins": 1.258972406387329,
98
- "rewards/rejected": -3.3145804405212402,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 1.1374407582938388,
103
- "grad_norm": 61.8785357181442,
104
  "learning_rate": 2.2891223348923882e-07,
105
- "logits/chosen": 131.92459106445312,
106
- "logits/rejected": 135.47061157226562,
107
- "logps/chosen": -446.0873107910156,
108
- "logps/rejected": -530.6946411132812,
109
- "loss": 0.3112,
110
- "rewards/accuracies": 0.8687499761581421,
111
- "rewards/chosen": -2.4040818214416504,
112
- "rewards/margins": 2.312370777130127,
113
- "rewards/rejected": -4.716452121734619,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 1.3270142180094786,
118
- "grad_norm": 41.57352214721893,
119
  "learning_rate": 1.4754491880085317e-07,
120
- "logits/chosen": 124.61039733886719,
121
- "logits/rejected": 126.63330078125,
122
- "logps/chosen": -418.581787109375,
123
- "logps/rejected": -522.1246337890625,
124
- "loss": 0.1985,
125
  "rewards/accuracies": 0.9312499761581421,
126
- "rewards/chosen": -2.5718960762023926,
127
- "rewards/margins": 2.858640193939209,
128
- "rewards/rejected": -5.43053674697876,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 1.5165876777251186,
133
- "grad_norm": 45.00944046517108,
134
  "learning_rate": 7.775827023107834e-08,
135
- "logits/chosen": 110.86280822753906,
136
- "logits/rejected": 127.59481048583984,
137
- "logps/chosen": -417.392578125,
138
- "logps/rejected": -536.7481689453125,
139
- "loss": 0.1713,
140
  "rewards/accuracies": 0.9624999761581421,
141
- "rewards/chosen": -3.1870760917663574,
142
- "rewards/margins": 2.988443374633789,
143
- "rewards/rejected": -6.175518989562988,
144
  "step": 80
145
  },
146
  {
147
  "epoch": 1.7061611374407581,
148
- "grad_norm": 41.01677369762327,
149
  "learning_rate": 2.7440387297912122e-08,
150
- "logits/chosen": 110.12068176269531,
151
- "logits/rejected": 122.8779067993164,
152
- "logps/chosen": -448.1509704589844,
153
- "logps/rejected": -565.2049560546875,
154
- "loss": 0.1566,
155
- "rewards/accuracies": 0.9624999761581421,
156
- "rewards/chosen": -3.085996150970459,
157
- "rewards/margins": 3.2901604175567627,
158
- "rewards/rejected": -6.376156330108643,
159
  "step": 90
160
  },
161
  {
162
  "epoch": 1.8957345971563981,
163
- "grad_norm": 39.730984591066736,
164
  "learning_rate": 2.27878296044029e-09,
165
- "logits/chosen": 116.5959701538086,
166
- "logits/rejected": 116.32414245605469,
167
- "logps/chosen": -436.4479064941406,
168
- "logps/rejected": -531.0074462890625,
169
- "loss": 0.1582,
170
- "rewards/accuracies": 0.9437500238418579,
171
- "rewards/chosen": -2.723642110824585,
172
- "rewards/margins": 2.9885311126708984,
173
- "rewards/rejected": -5.7121734619140625,
174
  "step": 100
175
  },
176
  {
177
  "epoch": 1.8957345971563981,
178
- "eval_logits/chosen": 96.0169448852539,
179
- "eval_logits/rejected": 90.12943267822266,
180
- "eval_logps/chosen": -434.5467224121094,
181
- "eval_logps/rejected": -466.9059753417969,
182
- "eval_loss": 0.44273361563682556,
183
  "eval_rewards/accuracies": 0.7604166865348816,
184
- "eval_rewards/chosen": -3.543926954269409,
185
- "eval_rewards/margins": 1.6978217363357544,
186
- "eval_rewards/rejected": -5.241748809814453,
187
- "eval_runtime": 123.9778,
188
- "eval_samples_per_second": 6.049,
189
- "eval_steps_per_second": 0.194,
190
  "step": 100
191
  },
192
  {
193
  "epoch": 1.971563981042654,
194
  "step": 104,
195
  "total_flos": 0.0,
196
- "train_loss": 0.391697224516135,
197
- "train_runtime": 2305.723,
198
- "train_samples_per_second": 5.855,
199
- "train_steps_per_second": 0.045
200
  }
201
  ],
202
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.018957345971563982,
13
+ "grad_norm": 133.64647421295854,
14
  "learning_rate": 4.545454545454545e-08,
15
+ "logits/chosen": 117.4909439086914,
16
+ "logits/rejected": 126.8502426147461,
17
+ "logps/chosen": -338.3250732421875,
18
+ "logps/rejected": -438.210205078125,
19
  "loss": 0.6931,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
 
25
  },
26
  {
27
  "epoch": 0.1895734597156398,
28
+ "grad_norm": 137.17714765050428,
29
  "learning_rate": 4.545454545454545e-07,
30
+ "logits/chosen": 135.0139923095703,
31
+ "logits/rejected": 138.361328125,
32
+ "logps/chosen": -397.126220703125,
33
+ "logps/rejected": -439.42083740234375,
34
+ "loss": 0.7143,
35
+ "rewards/accuracies": 0.3888888955116272,
36
+ "rewards/chosen": -0.02168009988963604,
37
+ "rewards/margins": -0.04445798695087433,
38
+ "rewards/rejected": 0.02277788519859314,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.3791469194312796,
43
+ "grad_norm": 125.18497680199994,
44
  "learning_rate": 4.885348141000122e-07,
45
+ "logits/chosen": 121.53276062011719,
46
+ "logits/rejected": 125.26307678222656,
47
+ "logps/chosen": -370.91107177734375,
48
+ "logps/rejected": -423.38677978515625,
49
+ "loss": 0.6418,
50
+ "rewards/accuracies": 0.5874999761581421,
51
+ "rewards/chosen": 0.08632902055978775,
52
+ "rewards/margins": 0.28587669134140015,
53
+ "rewards/rejected": -0.199547678232193,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.5687203791469194,
58
+ "grad_norm": 115.98151525592598,
59
  "learning_rate": 4.5025027361734613e-07,
60
+ "logits/chosen": 142.67178344726562,
61
+ "logits/rejected": 136.16537475585938,
62
+ "logps/chosen": -415.01104736328125,
63
+ "logps/rejected": -460.3519592285156,
64
+ "loss": 0.5673,
65
+ "rewards/accuracies": 0.668749988079071,
66
+ "rewards/chosen": -1.1033741235733032,
67
+ "rewards/margins": 0.8893669843673706,
68
+ "rewards/rejected": -1.9927412271499634,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.7582938388625592,
73
+ "grad_norm": 112.04677171325864,
74
  "learning_rate": 3.893311157806091e-07,
75
+ "logits/chosen": 124.56459045410156,
76
+ "logits/rejected": 113.08979797363281,
77
+ "logps/chosen": -391.10174560546875,
78
+ "logps/rejected": -417.09051513671875,
79
+ "loss": 0.5586,
80
+ "rewards/accuracies": 0.71875,
81
+ "rewards/chosen": -1.8466203212738037,
82
+ "rewards/margins": 1.0624934434890747,
83
+ "rewards/rejected": -2.909113883972168,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.9478672985781991,
88
+ "grad_norm": 126.0267199667638,
89
  "learning_rate": 3.126631330646801e-07,
90
+ "logits/chosen": 138.824462890625,
91
+ "logits/rejected": 142.9259033203125,
92
+ "logps/chosen": -455.6646423339844,
93
+ "logps/rejected": -536.987548828125,
94
+ "loss": 0.4941,
95
+ "rewards/accuracies": 0.731249988079071,
96
+ "rewards/chosen": -1.8371152877807617,
97
+ "rewards/margins": 1.2058273553848267,
98
+ "rewards/rejected": -3.042942523956299,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 1.1374407582938388,
103
+ "grad_norm": 63.02753605606795,
104
  "learning_rate": 2.2891223348923882e-07,
105
+ "logits/chosen": 131.37802124023438,
106
+ "logits/rejected": 134.72222900390625,
107
+ "logps/chosen": -436.17047119140625,
108
+ "logps/rejected": -520.2355346679688,
109
+ "loss": 0.3078,
110
+ "rewards/accuracies": 0.918749988079071,
111
+ "rewards/chosen": -1.9126123189926147,
112
+ "rewards/margins": 2.285891056060791,
113
+ "rewards/rejected": -4.198503017425537,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 1.3270142180094786,
118
+ "grad_norm": 59.15589622996558,
119
  "learning_rate": 1.4754491880085317e-07,
120
+ "logits/chosen": 124.51689147949219,
121
+ "logits/rejected": 126.70524597167969,
122
+ "logps/chosen": -402.62066650390625,
123
+ "logps/rejected": -505.50006103515625,
124
+ "loss": 0.1932,
125
  "rewards/accuracies": 0.9312499761581421,
126
+ "rewards/chosen": -1.7549495697021484,
127
+ "rewards/margins": 2.8932533264160156,
128
+ "rewards/rejected": -4.648203372955322,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 1.5165876777251186,
133
+ "grad_norm": 44.24206971141979,
134
  "learning_rate": 7.775827023107834e-08,
135
+ "logits/chosen": 111.74947357177734,
136
+ "logits/rejected": 128.5332489013672,
137
+ "logps/chosen": -400.06146240234375,
138
+ "logps/rejected": -519.4473876953125,
139
+ "loss": 0.1687,
140
  "rewards/accuracies": 0.9624999761581421,
141
+ "rewards/chosen": -2.326984167098999,
142
+ "rewards/margins": 2.937407970428467,
143
+ "rewards/rejected": -5.264392375946045,
144
  "step": 80
145
  },
146
  {
147
  "epoch": 1.7061611374407581,
148
+ "grad_norm": 50.07584592888485,
149
  "learning_rate": 2.7440387297912122e-08,
150
+ "logits/chosen": 110.84814453125,
151
+ "logits/rejected": 123.78230285644531,
152
+ "logps/chosen": -435.03265380859375,
153
+ "logps/rejected": -550.7723388671875,
154
+ "loss": 0.1579,
155
+ "rewards/accuracies": 0.9750000238418579,
156
+ "rewards/chosen": -2.400338649749756,
157
+ "rewards/margins": 3.238767623901367,
158
+ "rewards/rejected": -5.639105796813965,
159
  "step": 90
160
  },
161
  {
162
  "epoch": 1.8957345971563981,
163
+ "grad_norm": 49.65552371508206,
164
  "learning_rate": 2.27878296044029e-09,
165
+ "logits/chosen": 117.1094970703125,
166
+ "logits/rejected": 117.060302734375,
167
+ "logps/chosen": -427.23431396484375,
168
+ "logps/rejected": -520.2066650390625,
169
+ "loss": 0.1591,
170
+ "rewards/accuracies": 0.949999988079071,
171
+ "rewards/chosen": -2.2608113288879395,
172
+ "rewards/margins": 2.9113571643829346,
173
+ "rewards/rejected": -5.172169208526611,
174
  "step": 100
175
  },
176
  {
177
  "epoch": 1.8957345971563981,
178
+ "eval_logits/chosen": 96.76607513427734,
179
+ "eval_logits/rejected": 91.05736541748047,
180
+ "eval_logps/chosen": -422.7994689941406,
181
+ "eval_logps/rejected": -453.052978515625,
182
+ "eval_loss": 0.4691648781299591,
183
  "eval_rewards/accuracies": 0.7604166865348816,
184
+ "eval_rewards/chosen": -2.9809672832489014,
185
+ "eval_rewards/margins": 1.6640973091125488,
186
+ "eval_rewards/rejected": -4.645064353942871,
187
+ "eval_runtime": 56.9732,
188
+ "eval_samples_per_second": 13.164,
189
+ "eval_steps_per_second": 0.421,
190
  "step": 100
191
  },
192
  {
193
  "epoch": 1.971563981042654,
194
  "step": 104,
195
  "total_flos": 0.0,
196
+ "train_loss": 0.3921648321243433,
197
+ "train_runtime": 1190.3032,
198
+ "train_samples_per_second": 11.342,
199
+ "train_steps_per_second": 0.087
200
  }
201
  ],
202
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32d4314fb42077b412ca888358e0197fb75b2357e9032ea0da1cd595a5e7608b
3
  size 6264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:912545168b662aa28a59d2ea7ff1a88af69cd74e0b822da195076b8d4f0f07e5
3
  size 6264