ramdhanfirdaus commited on
Commit
c982031
1 Parent(s): 3d8c9be

Training in progress, step 2100, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -216,11 +216,4 @@ The following `bitsandbytes` quantization config was used during training:
216
  ### Framework versions
217
 
218
 
219
- - PEFT 0.6.0.dev0
220
- ## Training procedure
221
-
222
-
223
- ### Framework versions
224
-
225
-
226
  - PEFT 0.6.0.dev0
 
216
  ### Framework versions
217
 
218
 
 
 
 
 
 
 
 
219
  - PEFT 0.6.0.dev0
last-checkpoint/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f3499658af1ccf0f719ffe4e8ecf336e15511850781e24c21a8cc7dbc131f3d
3
  size 50349441
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1df092317a6de4386d50958e2ce32691d5b83c6cb27896c4612f89f0c4f2a8b7
3
  size 50349441
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:531455402dce213cbf7f3d865f33cb3a680459ca9ebf797405993b81b39a1e2d
3
- size 100691721
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e297c3d71f9dce8e4b5889787cb7eb1c3327ce0b17bed66630796419656d52d
3
+ size 100693001
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1389c8fb4da5b08654d5ecff1c857ae43115866bad194f9289834fc2a0b6a9c8
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eac30a54a346eb77c88df167a4adbf13ea272bd1dd65fd9f0292631c36fab66
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0410ad60e6d0138b921a03b0a3e367fd27c6ab07cb9a5006fcb66ea8e5bbacc4
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1238b3bfa75e49a19396161e9e7b72ab89cdd1a3f63b51c0ab4d6e8d216c5a5
3
  size 627
last-checkpoint/special_tokens_map.json CHANGED
@@ -1,24 +1,6 @@
1
  {
2
- "bos_token": {
3
- "content": "<|endoftext|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|endoftext|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
  "pad_token": "<|endoftext|>",
17
- "unk_token": {
18
- "content": "<|endoftext|>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- }
24
  }
 
1
  {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
 
 
 
 
 
4
  "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
6
  }
last-checkpoint/tokenizer_config.json CHANGED
@@ -13,12 +13,8 @@
13
  "bos_token": "<|endoftext|>",
14
  "clean_up_tokenization_spaces": true,
15
  "eos_token": "<|endoftext|>",
16
- "max_length": 512,
17
  "model_max_length": 1024,
18
  "pad_token": "<|endoftext|>",
19
- "stride": 0,
20
  "tokenizer_class": "GPT2Tokenizer",
21
- "truncation_side": "right",
22
- "truncation_strategy": "longest_first",
23
  "unk_token": "<|endoftext|>"
24
  }
 
13
  "bos_token": "<|endoftext|>",
14
  "clean_up_tokenization_spaces": true,
15
  "eos_token": "<|endoftext|>",
 
16
  "model_max_length": 1024,
17
  "pad_token": "<|endoftext|>",
 
18
  "tokenizer_class": "GPT2Tokenizer",
 
 
19
  "unk_token": "<|endoftext|>"
20
  }
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.4988901615142822,
3
- "best_model_checkpoint": "./outputs/checkpoint-1700",
4
- "epoch": 1.238615664845173,
5
  "eval_steps": 100,
6
- "global_step": 1700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -151,107 +151,163 @@
151
  {
152
  "epoch": 0.8,
153
  "learning_rate": 0.0002,
154
- "loss": 1.6461,
155
  "step": 1100
156
  },
157
  {
158
  "epoch": 0.8,
159
- "eval_loss": 1.6240431070327759,
160
- "eval_runtime": 293.5989,
161
- "eval_samples_per_second": 21.369,
162
- "eval_steps_per_second": 2.674,
163
  "step": 1100
164
  },
165
  {
166
  "epoch": 0.87,
167
  "learning_rate": 0.0002,
168
- "loss": 1.5992,
169
  "step": 1200
170
  },
171
  {
172
  "epoch": 0.87,
173
- "eval_loss": 1.5974311828613281,
174
- "eval_runtime": 291.7,
175
- "eval_samples_per_second": 21.508,
176
- "eval_steps_per_second": 2.691,
177
  "step": 1200
178
  },
179
  {
180
  "epoch": 0.95,
181
  "learning_rate": 0.0002,
182
- "loss": 1.6021,
183
  "step": 1300
184
  },
185
  {
186
  "epoch": 0.95,
187
- "eval_loss": 1.5751127004623413,
188
- "eval_runtime": 289.9524,
189
- "eval_samples_per_second": 21.638,
190
- "eval_steps_per_second": 2.707,
191
  "step": 1300
192
  },
193
  {
194
  "epoch": 1.02,
195
  "learning_rate": 0.0002,
196
- "loss": 1.5538,
197
  "step": 1400
198
  },
199
  {
200
  "epoch": 1.02,
201
- "eval_loss": 1.5539450645446777,
202
- "eval_runtime": 287.8748,
203
- "eval_samples_per_second": 21.794,
204
- "eval_steps_per_second": 2.727,
205
  "step": 1400
206
  },
207
  {
208
  "epoch": 1.09,
209
  "learning_rate": 0.0002,
210
- "loss": 1.5249,
211
  "step": 1500
212
  },
213
  {
214
  "epoch": 1.09,
215
- "eval_loss": 1.5348094701766968,
216
- "eval_runtime": 287.891,
217
- "eval_samples_per_second": 21.793,
218
- "eval_steps_per_second": 2.727,
219
  "step": 1500
220
  },
221
  {
222
  "epoch": 1.17,
223
  "learning_rate": 0.0002,
224
- "loss": 1.506,
225
  "step": 1600
226
  },
227
  {
228
  "epoch": 1.17,
229
- "eval_loss": 1.515953540802002,
230
- "eval_runtime": 289.836,
231
- "eval_samples_per_second": 21.647,
232
- "eval_steps_per_second": 2.708,
233
  "step": 1600
234
  },
235
  {
236
  "epoch": 1.24,
237
  "learning_rate": 0.0002,
238
- "loss": 1.5042,
239
  "step": 1700
240
  },
241
  {
242
  "epoch": 1.24,
243
- "eval_loss": 1.4988901615142822,
244
- "eval_runtime": 291.5471,
245
- "eval_samples_per_second": 21.52,
246
- "eval_steps_per_second": 2.693,
247
  "step": 1700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  }
249
  ],
250
  "logging_steps": 100,
251
  "max_steps": 4116,
252
  "num_train_epochs": 3,
253
  "save_steps": 100,
254
- "total_flos": 1.0135854358789325e+17,
255
  "trial_name": null,
256
  "trial_params": null
257
  }
 
1
  {
2
+ "best_metric": 1.4408069849014282,
3
+ "best_model_checkpoint": "./outputs/checkpoint-2100",
4
+ "epoch": 1.530054644808743,
5
  "eval_steps": 100,
6
+ "global_step": 2100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
151
  {
152
  "epoch": 0.8,
153
  "learning_rate": 0.0002,
154
+ "loss": 1.6268,
155
  "step": 1100
156
  },
157
  {
158
  "epoch": 0.8,
159
+ "eval_loss": 1.6125303506851196,
160
+ "eval_runtime": 144.0648,
161
+ "eval_samples_per_second": 43.55,
162
+ "eval_steps_per_second": 5.449,
163
  "step": 1100
164
  },
165
  {
166
  "epoch": 0.87,
167
  "learning_rate": 0.0002,
168
+ "loss": 1.5911,
169
  "step": 1200
170
  },
171
  {
172
  "epoch": 0.87,
173
+ "eval_loss": 1.5925209522247314,
174
+ "eval_runtime": 144.042,
175
+ "eval_samples_per_second": 43.557,
176
+ "eval_steps_per_second": 5.45,
177
  "step": 1200
178
  },
179
  {
180
  "epoch": 0.95,
181
  "learning_rate": 0.0002,
182
+ "loss": 1.5986,
183
  "step": 1300
184
  },
185
  {
186
  "epoch": 0.95,
187
+ "eval_loss": 1.571681261062622,
188
+ "eval_runtime": 144.1746,
189
+ "eval_samples_per_second": 43.517,
190
+ "eval_steps_per_second": 5.445,
191
  "step": 1300
192
  },
193
  {
194
  "epoch": 1.02,
195
  "learning_rate": 0.0002,
196
+ "loss": 1.5514,
197
  "step": 1400
198
  },
199
  {
200
  "epoch": 1.02,
201
+ "eval_loss": 1.5524405241012573,
202
+ "eval_runtime": 183.3697,
203
+ "eval_samples_per_second": 34.215,
204
+ "eval_steps_per_second": 4.281,
205
  "step": 1400
206
  },
207
  {
208
  "epoch": 1.09,
209
  "learning_rate": 0.0002,
210
+ "loss": 1.5235,
211
  "step": 1500
212
  },
213
  {
214
  "epoch": 1.09,
215
+ "eval_loss": 1.534788966178894,
216
+ "eval_runtime": 317.3733,
217
+ "eval_samples_per_second": 19.769,
218
+ "eval_steps_per_second": 2.473,
219
  "step": 1500
220
  },
221
  {
222
  "epoch": 1.17,
223
  "learning_rate": 0.0002,
224
+ "loss": 1.5051,
225
  "step": 1600
226
  },
227
  {
228
  "epoch": 1.17,
229
+ "eval_loss": 1.517040491104126,
230
+ "eval_runtime": 315.5897,
231
+ "eval_samples_per_second": 19.88,
232
+ "eval_steps_per_second": 2.487,
233
  "step": 1600
234
  },
235
  {
236
  "epoch": 1.24,
237
  "learning_rate": 0.0002,
238
+ "loss": 1.5036,
239
  "step": 1700
240
  },
241
  {
242
  "epoch": 1.24,
243
+ "eval_loss": 1.500235915184021,
244
+ "eval_runtime": 314.1201,
245
+ "eval_samples_per_second": 19.973,
246
+ "eval_steps_per_second": 2.499,
247
  "step": 1700
248
+ },
249
+ {
250
+ "epoch": 1.31,
251
+ "learning_rate": 0.0002,
252
+ "loss": 1.4767,
253
+ "step": 1800
254
+ },
255
+ {
256
+ "epoch": 1.31,
257
+ "eval_loss": 1.4854458570480347,
258
+ "eval_runtime": 313.3904,
259
+ "eval_samples_per_second": 20.02,
260
+ "eval_steps_per_second": 2.505,
261
+ "step": 1800
262
+ },
263
+ {
264
+ "epoch": 1.38,
265
+ "learning_rate": 0.0002,
266
+ "loss": 1.4665,
267
+ "step": 1900
268
+ },
269
+ {
270
+ "epoch": 1.38,
271
+ "eval_loss": 1.4697930812835693,
272
+ "eval_runtime": 314.584,
273
+ "eval_samples_per_second": 19.944,
274
+ "eval_steps_per_second": 2.495,
275
+ "step": 1900
276
+ },
277
+ {
278
+ "epoch": 1.46,
279
+ "learning_rate": 0.0002,
280
+ "loss": 1.4498,
281
+ "step": 2000
282
+ },
283
+ {
284
+ "epoch": 1.46,
285
+ "eval_loss": 1.456108808517456,
286
+ "eval_runtime": 316.2748,
287
+ "eval_samples_per_second": 19.837,
288
+ "eval_steps_per_second": 2.482,
289
+ "step": 2000
290
+ },
291
+ {
292
+ "epoch": 1.53,
293
+ "learning_rate": 0.0002,
294
+ "loss": 1.4358,
295
+ "step": 2100
296
+ },
297
+ {
298
+ "epoch": 1.53,
299
+ "eval_loss": 1.4408069849014282,
300
+ "eval_runtime": 317.862,
301
+ "eval_samples_per_second": 19.738,
302
+ "eval_steps_per_second": 2.47,
303
+ "step": 2100
304
  }
305
  ],
306
  "logging_steps": 100,
307
  "max_steps": 4116,
308
  "num_train_epochs": 3,
309
  "save_steps": 100,
310
+ "total_flos": 1.2520111653502157e+17,
311
  "trial_name": null,
312
  "trial_params": null
313
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1ab4a714067324690a64db56d021644d50462360424c147b3e9df6b69650fa0
3
  size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:435ef416a520c327b44f1a335ae059bdb8b9a978d39dfecd5bff01684de2670c
3
  size 4155