jbjeong91 commited on
Commit
b472727
1 Parent(s): d285517

Model save

Browse files
README.md ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: llama3.1
4
+ base_model: meta-llama/Meta-Llama-3.1-8B-Instruct
5
+ tags:
6
+ - trl
7
+ - cpo
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: llama3.1-cpo_j-full-0913
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # llama3.1-cpo_j-full-0913
18
+
19
+ This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 1.4447
22
+ - Rewards/chosen: -16.0215
23
+ - Rewards/rejected: -16.8041
24
+ - Rewards/accuracies: 0.6261
25
+ - Rewards/margins: 0.7826
26
+ - Logps/rejected: -168.0413
27
+ - Logps/chosen: -160.2150
28
+ - Logits/rejected: -0.3389
29
+ - Logits/chosen: -0.3606
30
+ - Nll Loss: 0.2798
31
+
32
+ ## Model description
33
+
34
+ More information needed
35
+
36
+ ## Intended uses & limitations
37
+
38
+ More information needed
39
+
40
+ ## Training and evaluation data
41
+
42
+ More information needed
43
+
44
+ ## Training procedure
45
+
46
+ ### Training hyperparameters
47
+
48
+ The following hyperparameters were used during training:
49
+ - learning_rate: 1e-06
50
+ - train_batch_size: 4
51
+ - eval_batch_size: 4
52
+ - seed: 42
53
+ - distributed_type: multi-GPU
54
+ - num_devices: 4
55
+ - gradient_accumulation_steps: 8
56
+ - total_train_batch_size: 128
57
+ - total_eval_batch_size: 16
58
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
59
+ - lr_scheduler_type: linear
60
+ - lr_scheduler_warmup_ratio: 0.1
61
+ - num_epochs: 1
62
+
63
+ ### Training results
64
+
65
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Nll Loss |
66
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:--------:|
67
+ | 1.7833 | 0.2311 | 100 | 1.6389 | -15.3939 | -15.7792 | 0.5783 | 0.3853 | -157.7921 | -153.9390 | -0.3065 | -0.3333 | 0.2678 |
68
+ | 1.5321 | 0.4623 | 200 | 1.5242 | -15.8988 | -16.5121 | 0.5978 | 0.6132 | -165.1206 | -158.9884 | -0.4244 | -0.4423 | 0.2764 |
69
+ | 1.4722 | 0.6934 | 300 | 1.4633 | -16.0803 | -16.8141 | 0.6217 | 0.7338 | -168.1411 | -160.8031 | -0.3641 | -0.3856 | 0.2790 |
70
+ | 1.4589 | 0.9246 | 400 | 1.4447 | -16.0215 | -16.8041 | 0.6261 | 0.7826 | -168.0413 | -160.2150 | -0.3389 | -0.3606 | 0.2798 |
71
+
72
+
73
+ ### Framework versions
74
+
75
+ - Transformers 4.44.2
76
+ - Pytorch 2.3.1
77
+ - Datasets 2.21.0
78
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9985553308292401,
3
+ "total_flos": 0.0,
4
+ "train_loss": 1.6635627029118714,
5
+ "train_runtime": 10783.4889,
6
+ "train_samples": 55376,
7
+ "train_samples_per_second": 5.135,
8
+ "train_steps_per_second": 0.04
9
+ }
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 128000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128008,
7
+ 128009
8
+ ],
9
+ "temperature": 0.6,
10
+ "top_p": 0.9,
11
+ "transformers_version": "4.44.2"
12
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9985553308292401,
3
+ "total_flos": 0.0,
4
+ "train_loss": 1.6635627029118714,
5
+ "train_runtime": 10783.4889,
6
+ "train_samples": 55376,
7
+ "train_samples_per_second": 5.135,
8
+ "train_steps_per_second": 0.04
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,798 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9985553308292401,
5
+ "eval_steps": 100,
6
+ "global_step": 432,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.023114706732158336,
13
+ "grad_norm": 67.55843353271484,
14
+ "learning_rate": 2.2727272727272726e-07,
15
+ "logits/chosen": -0.33515310287475586,
16
+ "logits/rejected": -0.31493520736694336,
17
+ "logps/chosen": -269.29345703125,
18
+ "logps/rejected": -267.6085205078125,
19
+ "loss": 2.9222,
20
+ "nll_loss": 1.0535928010940552,
21
+ "rewards/accuracies": 0.4906249940395355,
22
+ "rewards/chosen": -26.929346084594727,
23
+ "rewards/margins": -0.16849184036254883,
24
+ "rewards/rejected": -26.760854721069336,
25
+ "step": 10
26
+ },
27
+ {
28
+ "epoch": 0.04622941346431667,
29
+ "grad_norm": 57.353782653808594,
30
+ "learning_rate": 4.545454545454545e-07,
31
+ "logits/chosen": -0.3512418866157532,
32
+ "logits/rejected": -0.3330627977848053,
33
+ "logps/chosen": -260.02117919921875,
34
+ "logps/rejected": -266.5718078613281,
35
+ "loss": 2.8619,
36
+ "nll_loss": 0.9778251647949219,
37
+ "rewards/accuracies": 0.565625011920929,
38
+ "rewards/chosen": -26.002117156982422,
39
+ "rewards/margins": 0.6550623178482056,
40
+ "rewards/rejected": -26.657180786132812,
41
+ "step": 20
42
+ },
43
+ {
44
+ "epoch": 0.06934412019647501,
45
+ "grad_norm": 57.872154235839844,
46
+ "learning_rate": 6.818181818181817e-07,
47
+ "logits/chosen": -0.38214609026908875,
48
+ "logits/rejected": -0.36934739351272583,
49
+ "logps/chosen": -243.16397094726562,
50
+ "logps/rejected": -246.39990234375,
51
+ "loss": 2.6585,
52
+ "nll_loss": 1.018157958984375,
53
+ "rewards/accuracies": 0.512499988079071,
54
+ "rewards/chosen": -24.316394805908203,
55
+ "rewards/margins": 0.3235955238342285,
56
+ "rewards/rejected": -24.639989852905273,
57
+ "step": 30
58
+ },
59
+ {
60
+ "epoch": 0.09245882692863334,
61
+ "grad_norm": 51.343589782714844,
62
+ "learning_rate": 9.09090909090909e-07,
63
+ "logits/chosen": -0.6919909119606018,
64
+ "logits/rejected": -0.6750722527503967,
65
+ "logps/chosen": -202.0379180908203,
66
+ "logps/rejected": -203.7409210205078,
67
+ "loss": 2.385,
68
+ "nll_loss": 0.866622805595398,
69
+ "rewards/accuracies": 0.5218750238418579,
70
+ "rewards/chosen": -20.203792572021484,
71
+ "rewards/margins": 0.17029908299446106,
72
+ "rewards/rejected": -20.37409019470215,
73
+ "step": 40
74
+ },
75
+ {
76
+ "epoch": 0.11557353366079168,
77
+ "grad_norm": 45.946136474609375,
78
+ "learning_rate": 9.845360824742267e-07,
79
+ "logits/chosen": -0.8110788464546204,
80
+ "logits/rejected": -0.7866124510765076,
81
+ "logps/chosen": -176.52212524414062,
82
+ "logps/rejected": -175.50418090820312,
83
+ "loss": 2.1668,
84
+ "nll_loss": 0.4587995409965515,
85
+ "rewards/accuracies": 0.5062500238418579,
86
+ "rewards/chosen": -17.65221405029297,
87
+ "rewards/margins": -0.10179616510868073,
88
+ "rewards/rejected": -17.550418853759766,
89
+ "step": 50
90
+ },
91
+ {
92
+ "epoch": 0.13868824039295002,
93
+ "grad_norm": 55.89133834838867,
94
+ "learning_rate": 9.587628865979382e-07,
95
+ "logits/chosen": -0.635937511920929,
96
+ "logits/rejected": -0.6386198997497559,
97
+ "logps/chosen": -158.75927734375,
98
+ "logps/rejected": -159.04640197753906,
99
+ "loss": 1.9369,
100
+ "nll_loss": 0.41199779510498047,
101
+ "rewards/accuracies": 0.528124988079071,
102
+ "rewards/chosen": -15.87592887878418,
103
+ "rewards/margins": 0.028712665662169456,
104
+ "rewards/rejected": -15.904638290405273,
105
+ "step": 60
106
+ },
107
+ {
108
+ "epoch": 0.16180294712510834,
109
+ "grad_norm": 51.20368957519531,
110
+ "learning_rate": 9.329896907216495e-07,
111
+ "logits/chosen": -0.46112680435180664,
112
+ "logits/rejected": -0.4352455139160156,
113
+ "logps/chosen": -154.1770782470703,
114
+ "logps/rejected": -156.22377014160156,
115
+ "loss": 1.8871,
116
+ "nll_loss": 0.3339909613132477,
117
+ "rewards/accuracies": 0.5218750238418579,
118
+ "rewards/chosen": -15.417707443237305,
119
+ "rewards/margins": 0.20467153191566467,
120
+ "rewards/rejected": -15.622377395629883,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 0.1849176538572667,
125
+ "grad_norm": 46.16290283203125,
126
+ "learning_rate": 9.072164948453608e-07,
127
+ "logits/chosen": -0.36660072207450867,
128
+ "logits/rejected": -0.3421391248703003,
129
+ "logps/chosen": -158.54318237304688,
130
+ "logps/rejected": -161.6597442626953,
131
+ "loss": 1.7235,
132
+ "nll_loss": 0.2865411043167114,
133
+ "rewards/accuracies": 0.5093749761581421,
134
+ "rewards/chosen": -15.85431957244873,
135
+ "rewards/margins": 0.311655193567276,
136
+ "rewards/rejected": -16.165973663330078,
137
+ "step": 80
138
+ },
139
+ {
140
+ "epoch": 0.208032360589425,
141
+ "grad_norm": 49.6414680480957,
142
+ "learning_rate": 8.814432989690721e-07,
143
+ "logits/chosen": -0.3728088140487671,
144
+ "logits/rejected": -0.3610958456993103,
145
+ "logps/chosen": -153.40731811523438,
146
+ "logps/rejected": -161.5341796875,
147
+ "loss": 1.6191,
148
+ "nll_loss": 0.255443274974823,
149
+ "rewards/accuracies": 0.6000000238418579,
150
+ "rewards/chosen": -15.340731620788574,
151
+ "rewards/margins": 0.812686562538147,
152
+ "rewards/rejected": -16.153419494628906,
153
+ "step": 90
154
+ },
155
+ {
156
+ "epoch": 0.23114706732158335,
157
+ "grad_norm": 45.195945739746094,
158
+ "learning_rate": 8.556701030927834e-07,
159
+ "logits/chosen": -0.34290799498558044,
160
+ "logits/rejected": -0.3276888430118561,
161
+ "logps/chosen": -156.65350341796875,
162
+ "logps/rejected": -159.73312377929688,
163
+ "loss": 1.7833,
164
+ "nll_loss": 0.2992493510246277,
165
+ "rewards/accuracies": 0.546875,
166
+ "rewards/chosen": -15.665351867675781,
167
+ "rewards/margins": 0.30796217918395996,
168
+ "rewards/rejected": -15.973315238952637,
169
+ "step": 100
170
+ },
171
+ {
172
+ "epoch": 0.23114706732158335,
173
+ "eval_logits/chosen": -0.3332868814468384,
174
+ "eval_logits/rejected": -0.30650395154953003,
175
+ "eval_logps/chosen": -153.93896484375,
176
+ "eval_logps/rejected": -157.79205322265625,
177
+ "eval_loss": 1.6389189958572388,
178
+ "eval_nll_loss": 0.26784786581993103,
179
+ "eval_rewards/accuracies": 0.5782608985900879,
180
+ "eval_rewards/chosen": -15.393896102905273,
181
+ "eval_rewards/margins": 0.38530847430229187,
182
+ "eval_rewards/rejected": -15.779205322265625,
183
+ "eval_runtime": 77.324,
184
+ "eval_samples_per_second": 23.615,
185
+ "eval_steps_per_second": 1.487,
186
+ "step": 100
187
+ },
188
+ {
189
+ "epoch": 0.2542617740537417,
190
+ "grad_norm": 46.2556037902832,
191
+ "learning_rate": 8.298969072164948e-07,
192
+ "logits/chosen": -0.28695493936538696,
193
+ "logits/rejected": -0.2580588459968567,
194
+ "logps/chosen": -150.8438262939453,
195
+ "logps/rejected": -155.1436767578125,
196
+ "loss": 1.5662,
197
+ "nll_loss": 0.2598631978034973,
198
+ "rewards/accuracies": 0.5625,
199
+ "rewards/chosen": -15.084381103515625,
200
+ "rewards/margins": 0.4299860894680023,
201
+ "rewards/rejected": -15.514368057250977,
202
+ "step": 110
203
+ },
204
+ {
205
+ "epoch": 0.27737648078590005,
206
+ "grad_norm": 44.54212188720703,
207
+ "learning_rate": 8.041237113402062e-07,
208
+ "logits/chosen": -0.3080243170261383,
209
+ "logits/rejected": -0.2822437286376953,
210
+ "logps/chosen": -157.50094604492188,
211
+ "logps/rejected": -159.069580078125,
212
+ "loss": 1.6159,
213
+ "nll_loss": 0.30583077669143677,
214
+ "rewards/accuracies": 0.5406249761581421,
215
+ "rewards/chosen": -15.750096321105957,
216
+ "rewards/margins": 0.1568617820739746,
217
+ "rewards/rejected": -15.906957626342773,
218
+ "step": 120
219
+ },
220
+ {
221
+ "epoch": 0.30049118751805837,
222
+ "grad_norm": 55.04152297973633,
223
+ "learning_rate": 7.783505154639175e-07,
224
+ "logits/chosen": -0.30607444047927856,
225
+ "logits/rejected": -0.2829833924770355,
226
+ "logps/chosen": -157.97000122070312,
227
+ "logps/rejected": -166.240478515625,
228
+ "loss": 1.6775,
229
+ "nll_loss": 0.2982478737831116,
230
+ "rewards/accuracies": 0.596875011920929,
231
+ "rewards/chosen": -15.79699993133545,
232
+ "rewards/margins": 0.8270493745803833,
233
+ "rewards/rejected": -16.624048233032227,
234
+ "step": 130
235
+ },
236
+ {
237
+ "epoch": 0.3236058942502167,
238
+ "grad_norm": 68.56768035888672,
239
+ "learning_rate": 7.525773195876288e-07,
240
+ "logits/chosen": -0.38547760248184204,
241
+ "logits/rejected": -0.3799718916416168,
242
+ "logps/chosen": -148.39646911621094,
243
+ "logps/rejected": -153.57923889160156,
244
+ "loss": 1.6639,
245
+ "nll_loss": 0.2873283624649048,
246
+ "rewards/accuracies": 0.581250011920929,
247
+ "rewards/chosen": -14.83964729309082,
248
+ "rewards/margins": 0.5182766318321228,
249
+ "rewards/rejected": -15.35792350769043,
250
+ "step": 140
251
+ },
252
+ {
253
+ "epoch": 0.34672060098237506,
254
+ "grad_norm": 61.17930603027344,
255
+ "learning_rate": 7.268041237113402e-07,
256
+ "logits/chosen": -0.4227396845817566,
257
+ "logits/rejected": -0.41468995809555054,
258
+ "logps/chosen": -147.5921173095703,
259
+ "logps/rejected": -153.60935974121094,
260
+ "loss": 1.7192,
261
+ "nll_loss": 0.28114941716194153,
262
+ "rewards/accuracies": 0.609375,
263
+ "rewards/chosen": -14.759210586547852,
264
+ "rewards/margins": 0.6017246246337891,
265
+ "rewards/rejected": -15.360937118530273,
266
+ "step": 150
267
+ },
268
+ {
269
+ "epoch": 0.3698353077145334,
270
+ "grad_norm": 41.435455322265625,
271
+ "learning_rate": 7.010309278350515e-07,
272
+ "logits/chosen": -0.45233017206192017,
273
+ "logits/rejected": -0.4274457097053528,
274
+ "logps/chosen": -162.49929809570312,
275
+ "logps/rejected": -166.28240966796875,
276
+ "loss": 1.5534,
277
+ "nll_loss": 0.28693827986717224,
278
+ "rewards/accuracies": 0.5625,
279
+ "rewards/chosen": -16.249929428100586,
280
+ "rewards/margins": 0.3783140480518341,
281
+ "rewards/rejected": -16.62824249267578,
282
+ "step": 160
283
+ },
284
+ {
285
+ "epoch": 0.3929500144466917,
286
+ "grad_norm": 47.89544677734375,
287
+ "learning_rate": 6.752577319587629e-07,
288
+ "logits/chosen": -0.36802831292152405,
289
+ "logits/rejected": -0.35963720083236694,
290
+ "logps/chosen": -158.8728790283203,
291
+ "logps/rejected": -166.11460876464844,
292
+ "loss": 1.4425,
293
+ "nll_loss": 0.2846711277961731,
294
+ "rewards/accuracies": 0.578125,
295
+ "rewards/chosen": -15.887288093566895,
296
+ "rewards/margins": 0.7241734862327576,
297
+ "rewards/rejected": -16.611461639404297,
298
+ "step": 170
299
+ },
300
+ {
301
+ "epoch": 0.41606472117885,
302
+ "grad_norm": 47.56120681762695,
303
+ "learning_rate": 6.494845360824742e-07,
304
+ "logits/chosen": -0.4111746847629547,
305
+ "logits/rejected": -0.4110477566719055,
306
+ "logps/chosen": -152.79598999023438,
307
+ "logps/rejected": -159.20770263671875,
308
+ "loss": 1.4742,
309
+ "nll_loss": 0.2701548635959625,
310
+ "rewards/accuracies": 0.5531250238418579,
311
+ "rewards/chosen": -15.279600143432617,
312
+ "rewards/margins": 0.6411706805229187,
313
+ "rewards/rejected": -15.920770645141602,
314
+ "step": 180
315
+ },
316
+ {
317
+ "epoch": 0.4391794279110084,
318
+ "grad_norm": 51.05569076538086,
319
+ "learning_rate": 6.237113402061855e-07,
320
+ "logits/chosen": -0.4827125668525696,
321
+ "logits/rejected": -0.4709135591983795,
322
+ "logps/chosen": -161.96566772460938,
323
+ "logps/rejected": -168.30032348632812,
324
+ "loss": 1.546,
325
+ "nll_loss": 0.29454725980758667,
326
+ "rewards/accuracies": 0.5843750238418579,
327
+ "rewards/chosen": -16.19656753540039,
328
+ "rewards/margins": 0.63346266746521,
329
+ "rewards/rejected": -16.83003044128418,
330
+ "step": 190
331
+ },
332
+ {
333
+ "epoch": 0.4622941346431667,
334
+ "grad_norm": 44.95571517944336,
335
+ "learning_rate": 5.979381443298969e-07,
336
+ "logits/chosen": -0.426901638507843,
337
+ "logits/rejected": -0.42319783568382263,
338
+ "logps/chosen": -160.36892700195312,
339
+ "logps/rejected": -166.8142547607422,
340
+ "loss": 1.5321,
341
+ "nll_loss": 0.29501739144325256,
342
+ "rewards/accuracies": 0.6187499761581421,
343
+ "rewards/chosen": -16.03689193725586,
344
+ "rewards/margins": 0.644534707069397,
345
+ "rewards/rejected": -16.681427001953125,
346
+ "step": 200
347
+ },
348
+ {
349
+ "epoch": 0.4622941346431667,
350
+ "eval_logits/chosen": -0.4423229396343231,
351
+ "eval_logits/rejected": -0.42438310384750366,
352
+ "eval_logps/chosen": -158.98841857910156,
353
+ "eval_logps/rejected": -165.12060546875,
354
+ "eval_loss": 1.524242877960205,
355
+ "eval_nll_loss": 0.2763634920120239,
356
+ "eval_rewards/accuracies": 0.5978260636329651,
357
+ "eval_rewards/chosen": -15.898841857910156,
358
+ "eval_rewards/margins": 0.6132183074951172,
359
+ "eval_rewards/rejected": -16.512060165405273,
360
+ "eval_runtime": 77.1561,
361
+ "eval_samples_per_second": 23.666,
362
+ "eval_steps_per_second": 1.49,
363
+ "step": 200
364
+ },
365
+ {
366
+ "epoch": 0.48540884137532503,
367
+ "grad_norm": 52.92426300048828,
368
+ "learning_rate": 5.721649484536082e-07,
369
+ "logits/chosen": -0.45326417684555054,
370
+ "logits/rejected": -0.42941370606422424,
371
+ "logps/chosen": -156.1173553466797,
372
+ "logps/rejected": -158.58352661132812,
373
+ "loss": 1.5104,
374
+ "nll_loss": 0.28336626291275024,
375
+ "rewards/accuracies": 0.5531250238418579,
376
+ "rewards/chosen": -15.611738204956055,
377
+ "rewards/margins": 0.246616929769516,
378
+ "rewards/rejected": -15.858355522155762,
379
+ "step": 210
380
+ },
381
+ {
382
+ "epoch": 0.5085235481074833,
383
+ "grad_norm": 50.266258239746094,
384
+ "learning_rate": 5.463917525773195e-07,
385
+ "logits/chosen": -0.471625953912735,
386
+ "logits/rejected": -0.4460209906101227,
387
+ "logps/chosen": -168.0143585205078,
388
+ "logps/rejected": -173.90097045898438,
389
+ "loss": 1.569,
390
+ "nll_loss": 0.28898459672927856,
391
+ "rewards/accuracies": 0.6187499761581421,
392
+ "rewards/chosen": -16.801433563232422,
393
+ "rewards/margins": 0.5886625051498413,
394
+ "rewards/rejected": -17.390098571777344,
395
+ "step": 220
396
+ },
397
+ {
398
+ "epoch": 0.5316382548396418,
399
+ "grad_norm": 55.33750534057617,
400
+ "learning_rate": 5.20618556701031e-07,
401
+ "logits/chosen": -0.42029595375061035,
402
+ "logits/rejected": -0.4011649191379547,
403
+ "logps/chosen": -167.03567504882812,
404
+ "logps/rejected": -173.90760803222656,
405
+ "loss": 1.5782,
406
+ "nll_loss": 0.29872751235961914,
407
+ "rewards/accuracies": 0.6031249761581421,
408
+ "rewards/chosen": -16.703567504882812,
409
+ "rewards/margins": 0.687191903591156,
410
+ "rewards/rejected": -17.39076042175293,
411
+ "step": 230
412
+ },
413
+ {
414
+ "epoch": 0.5547529615718001,
415
+ "grad_norm": 48.64881896972656,
416
+ "learning_rate": 4.948453608247422e-07,
417
+ "logits/chosen": -0.37133297324180603,
418
+ "logits/rejected": -0.3616218566894531,
419
+ "logps/chosen": -166.56610107421875,
420
+ "logps/rejected": -171.88787841796875,
421
+ "loss": 1.4277,
422
+ "nll_loss": 0.2906336784362793,
423
+ "rewards/accuracies": 0.559374988079071,
424
+ "rewards/chosen": -16.6566104888916,
425
+ "rewards/margins": 0.532177746295929,
426
+ "rewards/rejected": -17.18878746032715,
427
+ "step": 240
428
+ },
429
+ {
430
+ "epoch": 0.5778676683039584,
431
+ "grad_norm": 53.73638916015625,
432
+ "learning_rate": 4.6907216494845357e-07,
433
+ "logits/chosen": -0.3499279022216797,
434
+ "logits/rejected": -0.3348368704319,
435
+ "logps/chosen": -161.66017150878906,
436
+ "logps/rejected": -165.3099822998047,
437
+ "loss": 1.5715,
438
+ "nll_loss": 0.24935810267925262,
439
+ "rewards/accuracies": 0.550000011920929,
440
+ "rewards/chosen": -16.166015625,
441
+ "rewards/margins": 0.3649832606315613,
442
+ "rewards/rejected": -16.5310001373291,
443
+ "step": 250
444
+ },
445
+ {
446
+ "epoch": 0.6009823750361167,
447
+ "grad_norm": 51.90835952758789,
448
+ "learning_rate": 4.432989690721649e-07,
449
+ "logits/chosen": -0.46265679597854614,
450
+ "logits/rejected": -0.4500146508216858,
451
+ "logps/chosen": -158.95870971679688,
452
+ "logps/rejected": -167.43258666992188,
453
+ "loss": 1.4266,
454
+ "nll_loss": 0.2810400724411011,
455
+ "rewards/accuracies": 0.6187499761581421,
456
+ "rewards/chosen": -15.895869255065918,
457
+ "rewards/margins": 0.8473905324935913,
458
+ "rewards/rejected": -16.743261337280273,
459
+ "step": 260
460
+ },
461
+ {
462
+ "epoch": 0.624097081768275,
463
+ "grad_norm": 43.483646392822266,
464
+ "learning_rate": 4.175257731958763e-07,
465
+ "logits/chosen": -0.4157690107822418,
466
+ "logits/rejected": -0.41007503867149353,
467
+ "logps/chosen": -158.64816284179688,
468
+ "logps/rejected": -165.8525848388672,
469
+ "loss": 1.4656,
470
+ "nll_loss": 0.2797052264213562,
471
+ "rewards/accuracies": 0.6156250238418579,
472
+ "rewards/chosen": -15.864816665649414,
473
+ "rewards/margins": 0.7204429507255554,
474
+ "rewards/rejected": -16.58526039123535,
475
+ "step": 270
476
+ },
477
+ {
478
+ "epoch": 0.6472117885004334,
479
+ "grad_norm": 55.48883819580078,
480
+ "learning_rate": 3.917525773195876e-07,
481
+ "logits/chosen": -0.34205523133277893,
482
+ "logits/rejected": -0.3176972568035126,
483
+ "logps/chosen": -161.7351531982422,
484
+ "logps/rejected": -171.16500854492188,
485
+ "loss": 1.4658,
486
+ "nll_loss": 0.29218602180480957,
487
+ "rewards/accuracies": 0.625,
488
+ "rewards/chosen": -16.173513412475586,
489
+ "rewards/margins": 0.9429864883422852,
490
+ "rewards/rejected": -17.116500854492188,
491
+ "step": 280
492
+ },
493
+ {
494
+ "epoch": 0.6703264952325917,
495
+ "grad_norm": 58.60624694824219,
496
+ "learning_rate": 3.659793814432989e-07,
497
+ "logits/chosen": -0.3818144202232361,
498
+ "logits/rejected": -0.35421329736709595,
499
+ "logps/chosen": -165.26620483398438,
500
+ "logps/rejected": -171.03085327148438,
501
+ "loss": 1.4217,
502
+ "nll_loss": 0.29271894693374634,
503
+ "rewards/accuracies": 0.6000000238418579,
504
+ "rewards/chosen": -16.526620864868164,
505
+ "rewards/margins": 0.5764636397361755,
506
+ "rewards/rejected": -17.103084564208984,
507
+ "step": 290
508
+ },
509
+ {
510
+ "epoch": 0.6934412019647501,
511
+ "grad_norm": 43.18596267700195,
512
+ "learning_rate": 3.402061855670103e-07,
513
+ "logits/chosen": -0.44146934151649475,
514
+ "logits/rejected": -0.4326046109199524,
515
+ "logps/chosen": -168.69503784179688,
516
+ "logps/rejected": -178.41549682617188,
517
+ "loss": 1.4722,
518
+ "nll_loss": 0.3029026389122009,
519
+ "rewards/accuracies": 0.6156250238418579,
520
+ "rewards/chosen": -16.869503021240234,
521
+ "rewards/margins": 0.9720472097396851,
522
+ "rewards/rejected": -17.841550827026367,
523
+ "step": 300
524
+ },
525
+ {
526
+ "epoch": 0.6934412019647501,
527
+ "eval_logits/chosen": -0.3856337070465088,
528
+ "eval_logits/rejected": -0.36413437128067017,
529
+ "eval_logps/chosen": -160.80313110351562,
530
+ "eval_logps/rejected": -168.14109802246094,
531
+ "eval_loss": 1.463273048400879,
532
+ "eval_nll_loss": 0.27904874086380005,
533
+ "eval_rewards/accuracies": 0.6217391490936279,
534
+ "eval_rewards/chosen": -16.080310821533203,
535
+ "eval_rewards/margins": 0.7337984442710876,
536
+ "eval_rewards/rejected": -16.814109802246094,
537
+ "eval_runtime": 76.8594,
538
+ "eval_samples_per_second": 23.758,
539
+ "eval_steps_per_second": 1.496,
540
+ "step": 300
541
+ },
542
+ {
543
+ "epoch": 0.7165559086969084,
544
+ "grad_norm": 48.37858200073242,
545
+ "learning_rate": 3.1443298969072163e-07,
546
+ "logits/chosen": -0.43834060430526733,
547
+ "logits/rejected": -0.4327595829963684,
548
+ "logps/chosen": -170.76321411132812,
549
+ "logps/rejected": -174.53939819335938,
550
+ "loss": 1.5353,
551
+ "nll_loss": 0.30162352323532104,
552
+ "rewards/accuracies": 0.559374988079071,
553
+ "rewards/chosen": -17.07632064819336,
554
+ "rewards/margins": 0.37761738896369934,
555
+ "rewards/rejected": -17.45393943786621,
556
+ "step": 310
557
+ },
558
+ {
559
+ "epoch": 0.7396706154290668,
560
+ "grad_norm": 45.166900634765625,
561
+ "learning_rate": 2.8865979381443296e-07,
562
+ "logits/chosen": -0.4189019799232483,
563
+ "logits/rejected": -0.41374340653419495,
564
+ "logps/chosen": -159.76388549804688,
565
+ "logps/rejected": -168.59652709960938,
566
+ "loss": 1.521,
567
+ "nll_loss": 0.28271549940109253,
568
+ "rewards/accuracies": 0.628125011920929,
569
+ "rewards/chosen": -15.976388931274414,
570
+ "rewards/margins": 0.8832640647888184,
571
+ "rewards/rejected": -16.85965347290039,
572
+ "step": 320
573
+ },
574
+ {
575
+ "epoch": 0.7627853221612251,
576
+ "grad_norm": 46.85552215576172,
577
+ "learning_rate": 2.6288659793814435e-07,
578
+ "logits/chosen": -0.4116067886352539,
579
+ "logits/rejected": -0.4161633551120758,
580
+ "logps/chosen": -159.32391357421875,
581
+ "logps/rejected": -166.09951782226562,
582
+ "loss": 1.4975,
583
+ "nll_loss": 0.299803227186203,
584
+ "rewards/accuracies": 0.628125011920929,
585
+ "rewards/chosen": -15.932391166687012,
586
+ "rewards/margins": 0.6775625944137573,
587
+ "rewards/rejected": -16.609954833984375,
588
+ "step": 330
589
+ },
590
+ {
591
+ "epoch": 0.7859000288933834,
592
+ "grad_norm": 47.712646484375,
593
+ "learning_rate": 2.3711340206185566e-07,
594
+ "logits/chosen": -0.4130411148071289,
595
+ "logits/rejected": -0.40607109665870667,
596
+ "logps/chosen": -162.58676147460938,
597
+ "logps/rejected": -167.87673950195312,
598
+ "loss": 1.3896,
599
+ "nll_loss": 0.27490609884262085,
600
+ "rewards/accuracies": 0.581250011920929,
601
+ "rewards/chosen": -16.258676528930664,
602
+ "rewards/margins": 0.5289959907531738,
603
+ "rewards/rejected": -16.78767204284668,
604
+ "step": 340
605
+ },
606
+ {
607
+ "epoch": 0.8090147356255417,
608
+ "grad_norm": 45.12944412231445,
609
+ "learning_rate": 2.11340206185567e-07,
610
+ "logits/chosen": -0.40707340836524963,
611
+ "logits/rejected": -0.4076949656009674,
612
+ "logps/chosen": -164.3544921875,
613
+ "logps/rejected": -173.22579956054688,
614
+ "loss": 1.4861,
615
+ "nll_loss": 0.279820054769516,
616
+ "rewards/accuracies": 0.6156250238418579,
617
+ "rewards/chosen": -16.435449600219727,
618
+ "rewards/margins": 0.8871291279792786,
619
+ "rewards/rejected": -17.32257843017578,
620
+ "step": 350
621
+ },
622
+ {
623
+ "epoch": 0.8321294423577,
624
+ "grad_norm": 55.76411437988281,
625
+ "learning_rate": 1.8556701030927835e-07,
626
+ "logits/chosen": -0.41376179456710815,
627
+ "logits/rejected": -0.4020335078239441,
628
+ "logps/chosen": -167.02381896972656,
629
+ "logps/rejected": -172.5863800048828,
630
+ "loss": 1.5281,
631
+ "nll_loss": 0.2744566798210144,
632
+ "rewards/accuracies": 0.596875011920929,
633
+ "rewards/chosen": -16.702383041381836,
634
+ "rewards/margins": 0.5562567114830017,
635
+ "rewards/rejected": -17.258638381958008,
636
+ "step": 360
637
+ },
638
+ {
639
+ "epoch": 0.8552441490898585,
640
+ "grad_norm": 48.47395706176758,
641
+ "learning_rate": 1.5979381443298966e-07,
642
+ "logits/chosen": -0.4272083342075348,
643
+ "logits/rejected": -0.4035620093345642,
644
+ "logps/chosen": -161.60487365722656,
645
+ "logps/rejected": -171.45762634277344,
646
+ "loss": 1.3967,
647
+ "nll_loss": 0.2558661103248596,
648
+ "rewards/accuracies": 0.65625,
649
+ "rewards/chosen": -16.160486221313477,
650
+ "rewards/margins": 0.9852760434150696,
651
+ "rewards/rejected": -17.145763397216797,
652
+ "step": 370
653
+ },
654
+ {
655
+ "epoch": 0.8783588558220168,
656
+ "grad_norm": 50.784507751464844,
657
+ "learning_rate": 1.3402061855670102e-07,
658
+ "logits/chosen": -0.4153415262699127,
659
+ "logits/rejected": -0.403484582901001,
660
+ "logps/chosen": -159.54061889648438,
661
+ "logps/rejected": -166.57180786132812,
662
+ "loss": 1.414,
663
+ "nll_loss": 0.2998126149177551,
664
+ "rewards/accuracies": 0.612500011920929,
665
+ "rewards/chosen": -15.954061508178711,
666
+ "rewards/margins": 0.7031179666519165,
667
+ "rewards/rejected": -16.657180786132812,
668
+ "step": 380
669
+ },
670
+ {
671
+ "epoch": 0.9014735625541751,
672
+ "grad_norm": 55.96210861206055,
673
+ "learning_rate": 1.0824742268041237e-07,
674
+ "logits/chosen": -0.4493354856967926,
675
+ "logits/rejected": -0.43461060523986816,
676
+ "logps/chosen": -163.90878295898438,
677
+ "logps/rejected": -174.5413055419922,
678
+ "loss": 1.4429,
679
+ "nll_loss": 0.27769935131073,
680
+ "rewards/accuracies": 0.6312500238418579,
681
+ "rewards/chosen": -16.39087677001953,
682
+ "rewards/margins": 1.063251256942749,
683
+ "rewards/rejected": -17.45412826538086,
684
+ "step": 390
685
+ },
686
+ {
687
+ "epoch": 0.9245882692863334,
688
+ "grad_norm": 49.90566635131836,
689
+ "learning_rate": 8.24742268041237e-08,
690
+ "logits/chosen": -0.3982897698879242,
691
+ "logits/rejected": -0.37326449155807495,
692
+ "logps/chosen": -166.83291625976562,
693
+ "logps/rejected": -172.27056884765625,
694
+ "loss": 1.4589,
695
+ "nll_loss": 0.3044116795063019,
696
+ "rewards/accuracies": 0.578125,
697
+ "rewards/chosen": -16.683292388916016,
698
+ "rewards/margins": 0.5437662601470947,
699
+ "rewards/rejected": -17.227060317993164,
700
+ "step": 400
701
+ },
702
+ {
703
+ "epoch": 0.9245882692863334,
704
+ "eval_logits/chosen": -0.360569566488266,
705
+ "eval_logits/rejected": -0.33890673518180847,
706
+ "eval_logps/chosen": -160.21502685546875,
707
+ "eval_logps/rejected": -168.04132080078125,
708
+ "eval_loss": 1.4446684122085571,
709
+ "eval_nll_loss": 0.2798323929309845,
710
+ "eval_rewards/accuracies": 0.626086950302124,
711
+ "eval_rewards/chosen": -16.021503448486328,
712
+ "eval_rewards/margins": 0.7826284766197205,
713
+ "eval_rewards/rejected": -16.80413246154785,
714
+ "eval_runtime": 76.9997,
715
+ "eval_samples_per_second": 23.714,
716
+ "eval_steps_per_second": 1.494,
717
+ "step": 400
718
+ },
719
+ {
720
+ "epoch": 0.9477029760184917,
721
+ "grad_norm": 50.10642623901367,
722
+ "learning_rate": 5.670103092783505e-08,
723
+ "logits/chosen": -0.3168153762817383,
724
+ "logits/rejected": -0.3060024678707123,
725
+ "logps/chosen": -152.92799377441406,
726
+ "logps/rejected": -162.26162719726562,
727
+ "loss": 1.4003,
728
+ "nll_loss": 0.2777474522590637,
729
+ "rewards/accuracies": 0.6187499761581421,
730
+ "rewards/chosen": -15.29279899597168,
731
+ "rewards/margins": 0.9333623051643372,
732
+ "rewards/rejected": -16.22616195678711,
733
+ "step": 410
734
+ },
735
+ {
736
+ "epoch": 0.9708176827506501,
737
+ "grad_norm": 46.16789245605469,
738
+ "learning_rate": 3.092783505154639e-08,
739
+ "logits/chosen": -0.3549926280975342,
740
+ "logits/rejected": -0.33413809537887573,
741
+ "logps/chosen": -155.63693237304688,
742
+ "logps/rejected": -161.7698211669922,
743
+ "loss": 1.4104,
744
+ "nll_loss": 0.28400248289108276,
745
+ "rewards/accuracies": 0.596875011920929,
746
+ "rewards/chosen": -15.563693046569824,
747
+ "rewards/margins": 0.6132909059524536,
748
+ "rewards/rejected": -16.176982879638672,
749
+ "step": 420
750
+ },
751
+ {
752
+ "epoch": 0.9939323894828085,
753
+ "grad_norm": 47.918479919433594,
754
+ "learning_rate": 5.154639175257731e-09,
755
+ "logits/chosen": -0.37286701798439026,
756
+ "logits/rejected": -0.38741737604141235,
757
+ "logps/chosen": -163.7307586669922,
758
+ "logps/rejected": -171.1845245361328,
759
+ "loss": 1.4847,
760
+ "nll_loss": 0.26778078079223633,
761
+ "rewards/accuracies": 0.6000000238418579,
762
+ "rewards/chosen": -16.373075485229492,
763
+ "rewards/margins": 0.7453770041465759,
764
+ "rewards/rejected": -17.118452072143555,
765
+ "step": 430
766
+ },
767
+ {
768
+ "epoch": 0.9985553308292401,
769
+ "step": 432,
770
+ "total_flos": 0.0,
771
+ "train_loss": 1.6635627029118714,
772
+ "train_runtime": 10783.4889,
773
+ "train_samples_per_second": 5.135,
774
+ "train_steps_per_second": 0.04
775
+ }
776
+ ],
777
+ "logging_steps": 10,
778
+ "max_steps": 432,
779
+ "num_input_tokens_seen": 0,
780
+ "num_train_epochs": 1,
781
+ "save_steps": 100,
782
+ "stateful_callbacks": {
783
+ "TrainerControl": {
784
+ "args": {
785
+ "should_epoch_stop": false,
786
+ "should_evaluate": false,
787
+ "should_log": false,
788
+ "should_save": true,
789
+ "should_training_stop": true
790
+ },
791
+ "attributes": {}
792
+ }
793
+ },
794
+ "total_flos": 0.0,
795
+ "train_batch_size": 4,
796
+ "trial_name": null,
797
+ "trial_params": null
798
+ }