dq158 commited on
Commit
7425b6a
1 Parent(s): 3087a0d

Training in progress, step 25000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:836cfae75cc4e0a96e10094df88438cae683a37de38a069fc93d3c052ab11376
3
  size 18915040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1162f3dd69f25f8696965e77a13ee78f76a56faa207df54198db0aa2c1ff8d34
3
  size 18915040
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e18baf51eaeff3b682fafb60aa656bf58fa9f0d9c1f4c498dbcbe8e4b4ee56e3
3
  size 37990394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a392000d9293e83d945e935d13a886a99f359ba416db00324a34f83ac689a60
3
  size 37990394
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70477ead05bce581876eb5d73e88cb3d8565a2290d6f8c1491b79ba699fb1d56
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:052cb623d333c55cb6aa932d620c4240d6da273a0db3bf72bfdbcc2be7693707
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:546e6ba6ca3229060626f9c187788612a8a7a0657f68d7b81ddac25c2e83e7b7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c007ea3613e729374f49a992d59f46f0bb1762290dd86d4c3105289d354272e2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5813699984012325,
5
  "eval_steps": 500,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -247,13 +247,73 @@
247
  "learning_rate": 7.75878444599598e-05,
248
  "loss": 3.2134,
249
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  }
251
  ],
252
  "logging_steps": 500,
253
  "max_steps": 172005,
254
  "num_train_epochs": 5,
255
  "save_steps": 5000,
256
- "total_flos": 6.8534491152384e+17,
257
  "trial_name": null,
258
  "trial_params": null
259
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7267124980015406,
5
  "eval_steps": 500,
6
+ "global_step": 25000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
247
  "learning_rate": 7.75878444599598e-05,
248
  "loss": 3.2134,
249
  "step": 20000
250
+ },
251
+ {
252
+ "epoch": 0.6,
253
+ "learning_rate": 7.746059656145306e-05,
254
+ "loss": 3.1633,
255
+ "step": 20500
256
+ },
257
+ {
258
+ "epoch": 0.61,
259
+ "learning_rate": 7.733018788656199e-05,
260
+ "loss": 3.2601,
261
+ "step": 21000
262
+ },
263
+ {
264
+ "epoch": 0.62,
265
+ "learning_rate": 7.71966294386527e-05,
266
+ "loss": 3.173,
267
+ "step": 21500
268
+ },
269
+ {
270
+ "epoch": 0.64,
271
+ "learning_rate": 7.70599324868567e-05,
272
+ "loss": 3.2264,
273
+ "step": 22000
274
+ },
275
+ {
276
+ "epoch": 0.65,
277
+ "learning_rate": 7.692010856511996e-05,
278
+ "loss": 3.1828,
279
+ "step": 22500
280
+ },
281
+ {
282
+ "epoch": 0.67,
283
+ "learning_rate": 7.677716947122976e-05,
284
+ "loss": 3.1522,
285
+ "step": 23000
286
+ },
287
+ {
288
+ "epoch": 0.68,
289
+ "learning_rate": 7.663112726581924e-05,
290
+ "loss": 3.2148,
291
+ "step": 23500
292
+ },
293
+ {
294
+ "epoch": 0.7,
295
+ "learning_rate": 7.648199427134978e-05,
296
+ "loss": 3.1741,
297
+ "step": 24000
298
+ },
299
+ {
300
+ "epoch": 0.71,
301
+ "learning_rate": 7.632978307107125e-05,
302
+ "loss": 3.2386,
303
+ "step": 24500
304
+ },
305
+ {
306
+ "epoch": 0.73,
307
+ "learning_rate": 7.617450650796032e-05,
308
+ "loss": 3.1865,
309
+ "step": 25000
310
  }
311
  ],
312
  "logging_steps": 500,
313
  "max_steps": 172005,
314
  "num_train_epochs": 5,
315
  "save_steps": 5000,
316
+ "total_flos": 8.566811394048e+17,
317
  "trial_name": null,
318
  "trial_params": null
319
  }