dq158 commited on
Commit
5233fd1
1 Parent(s): 8f614f8

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -204,11 +204,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
204
  ### Framework versions
205
 
206
 
207
- - PEFT 0.6.0
208
- ## Training procedure
209
-
210
-
211
- ### Framework versions
212
-
213
-
214
  - PEFT 0.6.0
 
204
  ### Framework versions
205
 
206
 
 
 
 
 
 
 
 
207
  - PEFT 0.6.0
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebcfdcdfa3bdc7a757e7a09d4af0930bb0ab6ae427582caca3e7e4491aaf78b8
3
  size 18915040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7a96565adb18def37a1340113d75649638f134820ae5a4e392f65280e9a5caa
3
  size 18915040
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7d2b1f4649fb7a0161e536a860fec72b1a52fca6f2468305dd94b096031f471
3
  size 2603258
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6d4a935399a73989d68a6b900b12edf4ca9281c8ca4e998c60b9d245163b3c7
3
  size 2603258
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae28f9b2f5015b95ad2f650d54d1979a68d40e31cc70d2fd5c54ddf3ae4a6519
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f38cdf3aedf46cd440f3f4dbc850109a1d275c7ba23da0f6101854bd55819b97
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57695ed4dda5b134b6f2bba8cf8471ac59a4530c5d9c347e44749e6041039a0e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d2b68cfebeb5175d553b1427044f3f0df89ff55d6dafbbecb0ca12fed4d4dc5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,37 +1,319 @@
1
  {
2
- "best_metric": 2.240180015563965,
3
- "best_model_checkpoint": "dq158/pingusPongus/checkpoint-1581",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 1581,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.32,
13
- "learning_rate": 0.0005,
14
- "loss": 2.8358,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 0.63,
19
- "learning_rate": 0.0004943963563220183,
20
- "loss": 2.4569,
21
  "step": 1000
22
  },
23
  {
24
- "epoch": 0.95,
25
- "learning_rate": 0.0004778366318678313,
26
- "loss": 2.3821,
27
  "step": 1500
28
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  {
30
  "epoch": 1.0,
31
  "eval_bleu": 1.0,
32
  "eval_brevity_penalty": 1.0,
33
  "eval_length_ratio": 1.0,
34
- "eval_loss": 2.240180015563965,
35
  "eval_precisions": [
36
  1.0,
37
  1.0,
@@ -39,16 +321,16 @@
39
  1.0
40
  ],
41
  "eval_reference_length": 1439232,
42
- "eval_runtime": 880.2333,
43
- "eval_samples_per_second": 3.193,
44
- "eval_steps_per_second": 0.2,
45
  "eval_translation_length": 1439232,
46
- "step": 1581
47
  }
48
  ],
49
  "logging_steps": 500,
50
- "max_steps": 7905,
51
- "num_train_epochs": 5,
52
  "save_steps": 500,
53
  "total_flos": 2.1666322696686797e+17,
54
  "trial_name": null,
 
1
  {
2
+ "best_metric": 2.452415943145752,
3
+ "best_model_checkpoint": "dq158/pingusPongus/checkpoint-25291",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 25291,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.02,
13
+ "learning_rate": 0.0004,
14
+ "loss": 3.2511,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 0.04,
19
+ "learning_rate": 0.0003999995708217828,
20
+ "loss": 2.8938,
21
  "step": 1000
22
  },
23
  {
24
+ "epoch": 0.06,
25
+ "learning_rate": 0.00039999828328897294,
26
+ "loss": 2.7835,
27
  "step": 1500
28
  },
29
+ {
30
+ "epoch": 0.08,
31
+ "learning_rate": 0.00039999613740709635,
32
+ "loss": 2.7405,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 0.1,
37
+ "learning_rate": 0.00039999313318536277,
38
+ "loss": 2.6816,
39
+ "step": 2500
40
+ },
41
+ {
42
+ "epoch": 0.12,
43
+ "learning_rate": 0.00039998927063666543,
44
+ "loss": 2.7085,
45
+ "step": 3000
46
+ },
47
+ {
48
+ "epoch": 0.14,
49
+ "learning_rate": 0.00039998454977758173,
50
+ "loss": 2.7713,
51
+ "step": 3500
52
+ },
53
+ {
54
+ "epoch": 0.16,
55
+ "learning_rate": 0.0003999789706283725,
56
+ "loss": 2.6722,
57
+ "step": 4000
58
+ },
59
+ {
60
+ "epoch": 0.18,
61
+ "learning_rate": 0.0003999725332129823,
62
+ "loss": 2.6873,
63
+ "step": 4500
64
+ },
65
+ {
66
+ "epoch": 0.2,
67
+ "learning_rate": 0.000399965237559039,
68
+ "loss": 2.6403,
69
+ "step": 5000
70
+ },
71
+ {
72
+ "epoch": 0.22,
73
+ "learning_rate": 0.00039995708369785403,
74
+ "loss": 2.6328,
75
+ "step": 5500
76
+ },
77
+ {
78
+ "epoch": 0.24,
79
+ "learning_rate": 0.000399948071664422,
80
+ "loss": 2.6212,
81
+ "step": 6000
82
+ },
83
+ {
84
+ "epoch": 0.26,
85
+ "learning_rate": 0.0003999382014974206,
86
+ "loss": 2.581,
87
+ "step": 6500
88
+ },
89
+ {
90
+ "epoch": 0.28,
91
+ "learning_rate": 0.00039992747323921036,
92
+ "loss": 2.6699,
93
+ "step": 7000
94
+ },
95
+ {
96
+ "epoch": 0.3,
97
+ "learning_rate": 0.0003999158869358347,
98
+ "loss": 2.6095,
99
+ "step": 7500
100
+ },
101
+ {
102
+ "epoch": 0.32,
103
+ "learning_rate": 0.00039990344263701955,
104
+ "loss": 2.6429,
105
+ "step": 8000
106
+ },
107
+ {
108
+ "epoch": 0.34,
109
+ "learning_rate": 0.00039989014039617295,
110
+ "loss": 2.6528,
111
+ "step": 8500
112
+ },
113
+ {
114
+ "epoch": 0.36,
115
+ "learning_rate": 0.0003998759802703854,
116
+ "loss": 2.5395,
117
+ "step": 9000
118
+ },
119
+ {
120
+ "epoch": 0.38,
121
+ "learning_rate": 0.000399860962320429,
122
+ "loss": 2.6032,
123
+ "step": 9500
124
+ },
125
+ {
126
+ "epoch": 0.4,
127
+ "learning_rate": 0.00039984508661075754,
128
+ "loss": 2.6129,
129
+ "step": 10000
130
+ },
131
+ {
132
+ "epoch": 0.42,
133
+ "learning_rate": 0.0003998283532095061,
134
+ "loss": 2.584,
135
+ "step": 10500
136
+ },
137
+ {
138
+ "epoch": 0.43,
139
+ "learning_rate": 0.00039981076218849077,
140
+ "loss": 2.6327,
141
+ "step": 11000
142
+ },
143
+ {
144
+ "epoch": 0.45,
145
+ "learning_rate": 0.00039979231362320845,
146
+ "loss": 2.6264,
147
+ "step": 11500
148
+ },
149
+ {
150
+ "epoch": 0.47,
151
+ "learning_rate": 0.00039977300759283627,
152
+ "loss": 2.6226,
153
+ "step": 12000
154
+ },
155
+ {
156
+ "epoch": 0.49,
157
+ "learning_rate": 0.0003997528441802316,
158
+ "loss": 2.5442,
159
+ "step": 12500
160
+ },
161
+ {
162
+ "epoch": 0.51,
163
+ "learning_rate": 0.0003997318234719313,
164
+ "loss": 2.622,
165
+ "step": 13000
166
+ },
167
+ {
168
+ "epoch": 0.53,
169
+ "learning_rate": 0.0003997099455581518,
170
+ "loss": 2.5825,
171
+ "step": 13500
172
+ },
173
+ {
174
+ "epoch": 0.55,
175
+ "learning_rate": 0.0003996872105327882,
176
+ "loss": 2.6382,
177
+ "step": 14000
178
+ },
179
+ {
180
+ "epoch": 0.57,
181
+ "learning_rate": 0.0003996636184934145,
182
+ "loss": 2.5633,
183
+ "step": 14500
184
+ },
185
+ {
186
+ "epoch": 0.59,
187
+ "learning_rate": 0.00039963916954128235,
188
+ "loss": 2.5505,
189
+ "step": 15000
190
+ },
191
+ {
192
+ "epoch": 0.61,
193
+ "learning_rate": 0.0003996138637813214,
194
+ "loss": 2.5991,
195
+ "step": 15500
196
+ },
197
+ {
198
+ "epoch": 0.63,
199
+ "learning_rate": 0.00039958770132213857,
200
+ "loss": 2.5736,
201
+ "step": 16000
202
+ },
203
+ {
204
+ "epoch": 0.65,
205
+ "learning_rate": 0.00039956068227601734,
206
+ "loss": 2.5436,
207
+ "step": 16500
208
+ },
209
+ {
210
+ "epoch": 0.67,
211
+ "learning_rate": 0.0003995328067589175,
212
+ "loss": 2.5502,
213
+ "step": 17000
214
+ },
215
+ {
216
+ "epoch": 0.69,
217
+ "learning_rate": 0.00039950407489047486,
218
+ "loss": 2.5067,
219
+ "step": 17500
220
+ },
221
+ {
222
+ "epoch": 0.71,
223
+ "learning_rate": 0.00039947448679400027,
224
+ "loss": 2.6009,
225
+ "step": 18000
226
+ },
227
+ {
228
+ "epoch": 0.73,
229
+ "learning_rate": 0.00039944404259647934,
230
+ "loss": 2.5333,
231
+ "step": 18500
232
+ },
233
+ {
234
+ "epoch": 0.75,
235
+ "learning_rate": 0.00039941274242857205,
236
+ "loss": 2.5414,
237
+ "step": 19000
238
+ },
239
+ {
240
+ "epoch": 0.77,
241
+ "learning_rate": 0.00039938058642461174,
242
+ "loss": 2.5632,
243
+ "step": 19500
244
+ },
245
+ {
246
+ "epoch": 0.79,
247
+ "learning_rate": 0.0003993475747226051,
248
+ "loss": 2.5763,
249
+ "step": 20000
250
+ },
251
+ {
252
+ "epoch": 0.81,
253
+ "learning_rate": 0.0003993137074642312,
254
+ "loss": 2.619,
255
+ "step": 20500
256
+ },
257
+ {
258
+ "epoch": 0.83,
259
+ "learning_rate": 0.00039927898479484076,
260
+ "loss": 2.5508,
261
+ "step": 21000
262
+ },
263
+ {
264
+ "epoch": 0.85,
265
+ "learning_rate": 0.0003992434068634561,
266
+ "loss": 2.5927,
267
+ "step": 21500
268
+ },
269
+ {
270
+ "epoch": 0.87,
271
+ "learning_rate": 0.00039920697382276986,
272
+ "loss": 2.5342,
273
+ "step": 22000
274
+ },
275
+ {
276
+ "epoch": 0.89,
277
+ "learning_rate": 0.0003991696858291447,
278
+ "loss": 2.5948,
279
+ "step": 22500
280
+ },
281
+ {
282
+ "epoch": 0.91,
283
+ "learning_rate": 0.0003991315430426126,
284
+ "loss": 2.5893,
285
+ "step": 23000
286
+ },
287
+ {
288
+ "epoch": 0.93,
289
+ "learning_rate": 0.0003990925456268741,
290
+ "loss": 2.5284,
291
+ "step": 23500
292
+ },
293
+ {
294
+ "epoch": 0.95,
295
+ "learning_rate": 0.0003990526937492976,
296
+ "loss": 2.5097,
297
+ "step": 24000
298
+ },
299
+ {
300
+ "epoch": 0.97,
301
+ "learning_rate": 0.0003990119875809187,
302
+ "loss": 2.6065,
303
+ "step": 24500
304
+ },
305
+ {
306
+ "epoch": 0.99,
307
+ "learning_rate": 0.00039897042729643934,
308
+ "loss": 2.5628,
309
+ "step": 25000
310
+ },
311
  {
312
  "epoch": 1.0,
313
  "eval_bleu": 1.0,
314
  "eval_brevity_penalty": 1.0,
315
  "eval_length_ratio": 1.0,
316
+ "eval_loss": 2.452415943145752,
317
  "eval_precisions": [
318
  1.0,
319
  1.0,
 
321
  1.0
322
  ],
323
  "eval_reference_length": 1439232,
324
+ "eval_runtime": 2451.7899,
325
+ "eval_samples_per_second": 1.147,
326
+ "eval_steps_per_second": 1.147,
327
  "eval_translation_length": 1439232,
328
+ "step": 25291
329
  }
330
  ],
331
  "logging_steps": 500,
332
+ "max_steps": 758730,
333
+ "num_train_epochs": 30,
334
  "save_steps": 500,
335
  "total_flos": 2.1666322696686797e+17,
336
  "trial_name": null,
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e3543db1b51c5d7f6f63f268f46bee0582a3b0fa6f02c3500965ec94f79e65a
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:703f54bdd3d2d84d639a2e91c0daab39fba039bc62b2aa3775d8910005bb9a6c
3
  size 4728