plip commited on
Commit
7a7d43b
1 Parent(s): 9d5e026

Training in progress, step 220000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfa0ab32e38b01ddbc573768dfaf0dfddabcc8a3d70f2bee44cb82fdf48bdbfb
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3ac76e1d220f6d8cb3dc3743f45e80298ffddaf4ef80817514d248746af9821
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8385ab1df2ebd2cf00e4d352e48666514cc5adaf924696c86b3104a741aaa958
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cb76675226c565779dfeb3a8b0d92c90ded4e386d4778c4ecac74f673efb538
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43a0a20032cd9b1693c0df17b5ce36ac0de1b8a71ec2dce02e457215ca17de74
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd1d2a3e2335707f39d021f603fb4d73c951356a2d333cf3eecb4bcaf66a94d5
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ab74f841af25583fde1ca8fdb0c1861852ed361e9321c893b9e5b237758c199
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b649e22628c0243a3802b9e8565a7279841c797cd164e62e5f4790c3cae8210
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:064bb77ca4a934e34a95ea9287f27a0ff8cd1d559261fa08d942aff78c8d69b3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f83c397386f43a750816cef91f85bfbfcd5ca30567987fd3bd04ea4161c050ff
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebe0a4f01c3d254f50684a93345a2b350eaf4ce2ffbfd3ab184e7c48ab1cbf8f
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75b5752b21c419f95766e4dd33840bff9a6347069f10f102c30daeec2edb1db8
3
+ size 14439
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4028538ec5575e9eae4ab14227f42ba25a4d5ad871aeb0a0910dce83388d0885
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a80ec1267650e6ad947a8c57465e241d88a3ac6ff26b577e91402626f8216828
3
+ size 14439
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53cbe542019d2f74f171c2183d760b9998750b9b6316341cb64d43d534f9c788
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e22cc7a3e8d006a7f53d4c0ba60293af6361e5d675b3152011d5ae5ff1bc470b
3
+ size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:893d43631edd1f00cc9f737c881bbde999bddaa7e8c8c4da0adc745254b97b30
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b096aeedbe1821683df58f9029fa8bc78db67ff58d60f8a0d3d16c91cb58a4cc
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:537d3cd36d12dd1a3735216763e1c615e8f3bdd474a41fbef3b6c5cb46612706
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7eaa4432fc6e41f9da719c55df3fb2ad8c3924f89bf79f109b21a31d039a089
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de6aa60c12b8dbcda06c27a65ac30e45d66753f2960bee09a7c0ea364ae7e103
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a05d01a7b2f868e6f0e645a01ef2a1ba3ad5ea16b1bff8e9cf0cf653b106d64f
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.434020650142467,
5
- "global_step": 210000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4206,11 +4206,211 @@
4206
  "eval_samples_per_second": 1863.216,
4207
  "eval_steps_per_second": 29.811,
4208
  "step": 210000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4209
  }
4210
  ],
4211
  "max_steps": 500000,
4212
  "num_train_epochs": 16,
4213
- "total_flos": 6.709181986552645e+21,
4214
  "trial_name": null,
4215
  "trial_params": null
4216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.740402585863538,
5
+ "global_step": 220000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4206
  "eval_samples_per_second": 1863.216,
4207
  "eval_steps_per_second": 29.811,
4208
  "step": 210000
4209
+ },
4210
+ {
4211
+ "epoch": 6.45,
4212
+ "learning_rate": 0.00020389135229309803,
4213
+ "loss": 0.3433,
4214
+ "step": 210500
4215
+ },
4216
+ {
4217
+ "epoch": 6.46,
4218
+ "learning_rate": 0.00020343965962416229,
4219
+ "loss": 0.3438,
4220
+ "step": 211000
4221
+ },
4222
+ {
4223
+ "epoch": 6.46,
4224
+ "eval_loss": 0.7825648784637451,
4225
+ "eval_runtime": 0.5312,
4226
+ "eval_samples_per_second": 1882.622,
4227
+ "eval_steps_per_second": 30.122,
4228
+ "step": 211000
4229
+ },
4230
+ {
4231
+ "epoch": 6.48,
4232
+ "learning_rate": 0.00020298743722686958,
4233
+ "loss": 0.3442,
4234
+ "step": 211500
4235
+ },
4236
+ {
4237
+ "epoch": 6.5,
4238
+ "learning_rate": 0.0002025346900466516,
4239
+ "loss": 0.3447,
4240
+ "step": 212000
4241
+ },
4242
+ {
4243
+ "epoch": 6.5,
4244
+ "eval_loss": 0.7843804955482483,
4245
+ "eval_runtime": 0.5233,
4246
+ "eval_samples_per_second": 1910.972,
4247
+ "eval_steps_per_second": 30.576,
4248
+ "step": 212000
4249
+ },
4250
+ {
4251
+ "epoch": 6.51,
4252
+ "learning_rate": 0.0002020814230346791,
4253
+ "loss": 0.3428,
4254
+ "step": 212500
4255
+ },
4256
+ {
4257
+ "epoch": 6.53,
4258
+ "learning_rate": 0.00020162764114780733,
4259
+ "loss": 0.3427,
4260
+ "step": 213000
4261
+ },
4262
+ {
4263
+ "epoch": 6.53,
4264
+ "eval_loss": 0.7835332155227661,
4265
+ "eval_runtime": 0.5169,
4266
+ "eval_samples_per_second": 1934.661,
4267
+ "eval_steps_per_second": 30.955,
4268
+ "step": 213000
4269
+ },
4270
+ {
4271
+ "epoch": 6.54,
4272
+ "learning_rate": 0.0002011733493485224,
4273
+ "loss": 0.3424,
4274
+ "step": 213500
4275
+ },
4276
+ {
4277
+ "epoch": 6.56,
4278
+ "learning_rate": 0.00020071855260488664,
4279
+ "loss": 0.3424,
4280
+ "step": 214000
4281
+ },
4282
+ {
4283
+ "epoch": 6.56,
4284
+ "eval_loss": 0.7858285903930664,
4285
+ "eval_runtime": 0.5163,
4286
+ "eval_samples_per_second": 1936.764,
4287
+ "eval_steps_per_second": 30.988,
4288
+ "step": 214000
4289
+ },
4290
+ {
4291
+ "epoch": 6.57,
4292
+ "learning_rate": 0.0002002632558904843,
4293
+ "loss": 0.3424,
4294
+ "step": 214500
4295
+ },
4296
+ {
4297
+ "epoch": 6.59,
4298
+ "learning_rate": 0.00019980746418436736,
4299
+ "loss": 0.3427,
4300
+ "step": 215000
4301
+ },
4302
+ {
4303
+ "epoch": 6.59,
4304
+ "eval_loss": 0.7807540893554688,
4305
+ "eval_runtime": 0.5276,
4306
+ "eval_samples_per_second": 1895.318,
4307
+ "eval_steps_per_second": 30.325,
4308
+ "step": 215000
4309
+ },
4310
+ {
4311
+ "epoch": 6.6,
4312
+ "learning_rate": 0.00019935118247100088,
4313
+ "loss": 0.3422,
4314
+ "step": 215500
4315
+ },
4316
+ {
4317
+ "epoch": 6.62,
4318
+ "learning_rate": 0.00019889441574020864,
4319
+ "loss": 0.3692,
4320
+ "step": 216000
4321
+ },
4322
+ {
4323
+ "epoch": 6.62,
4324
+ "eval_loss": 0.9048200845718384,
4325
+ "eval_runtime": 0.5244,
4326
+ "eval_samples_per_second": 1906.815,
4327
+ "eval_steps_per_second": 30.509,
4328
+ "step": 216000
4329
+ },
4330
+ {
4331
+ "epoch": 6.63,
4332
+ "learning_rate": 0.0001984371689871183,
4333
+ "loss": 0.7372,
4334
+ "step": 216500
4335
+ },
4336
+ {
4337
+ "epoch": 6.65,
4338
+ "learning_rate": 0.00019797944721210725,
4339
+ "loss": 0.7368,
4340
+ "step": 217000
4341
+ },
4342
+ {
4343
+ "epoch": 6.65,
4344
+ "eval_loss": 0.8997135162353516,
4345
+ "eval_runtime": 0.5259,
4346
+ "eval_samples_per_second": 1901.634,
4347
+ "eval_steps_per_second": 30.426,
4348
+ "step": 217000
4349
+ },
4350
+ {
4351
+ "epoch": 6.66,
4352
+ "learning_rate": 0.00019752125542074736,
4353
+ "loss": 0.7368,
4354
+ "step": 217500
4355
+ },
4356
+ {
4357
+ "epoch": 6.68,
4358
+ "learning_rate": 0.00019706259862375074,
4359
+ "loss": 0.7368,
4360
+ "step": 218000
4361
+ },
4362
+ {
4363
+ "epoch": 6.68,
4364
+ "eval_loss": 0.900769829750061,
4365
+ "eval_runtime": 0.5434,
4366
+ "eval_samples_per_second": 1840.131,
4367
+ "eval_steps_per_second": 29.442,
4368
+ "step": 218000
4369
+ },
4370
+ {
4371
+ "epoch": 6.69,
4372
+ "learning_rate": 0.00019660348183691453,
4373
+ "loss": 0.7368,
4374
+ "step": 218500
4375
+ },
4376
+ {
4377
+ "epoch": 6.71,
4378
+ "learning_rate": 0.0001961439100810664,
4379
+ "loss": 0.7367,
4380
+ "step": 219000
4381
+ },
4382
+ {
4383
+ "epoch": 6.71,
4384
+ "eval_loss": 0.8999822735786438,
4385
+ "eval_runtime": 0.5136,
4386
+ "eval_samples_per_second": 1947.174,
4387
+ "eval_steps_per_second": 31.155,
4388
+ "step": 219000
4389
+ },
4390
+ {
4391
+ "epoch": 6.73,
4392
+ "learning_rate": 0.00019568388838200952,
4393
+ "loss": 0.7368,
4394
+ "step": 219500
4395
+ },
4396
+ {
4397
+ "epoch": 6.74,
4398
+ "learning_rate": 0.00019522342177046744,
4399
+ "loss": 0.7368,
4400
+ "step": 220000
4401
+ },
4402
+ {
4403
+ "epoch": 6.74,
4404
+ "eval_loss": 0.9001851081848145,
4405
+ "eval_runtime": 0.5249,
4406
+ "eval_samples_per_second": 1904.969,
4407
+ "eval_steps_per_second": 30.48,
4408
+ "step": 220000
4409
  }
4410
  ],
4411
  "max_steps": 500000,
4412
  "num_train_epochs": 16,
4413
+ "total_flos": 7.028669125327145e+21,
4414
  "trial_name": null,
4415
  "trial_params": null
4416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8385ab1df2ebd2cf00e4d352e48666514cc5adaf924696c86b3104a741aaa958
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cb76675226c565779dfeb3a8b0d92c90ded4e386d4778c4ecac74f673efb538
3
  size 102501541