plip commited on
Commit
7851714
1 Parent(s): 3bcf721

Training in progress, step 220000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:378ba8062a28aa965b8015e80cac5a7e8c210f8eae62a9b35de790172a138b2c
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c0ec040ee2456e61ab40311e486f10de42e153791678bf443f54fc8893f828
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:500bbe867ded0d4e115c70f239c6f305c19492ee5158d2b7bb72cd84b524f5aa
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:027aff93cad77a00849e3e1032a09ce43e2d9d45511b5e4cbbe8559338ed73ad
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e56b27d50a3b069272092971e132d0f9ff41c52313b7354d6ff1c765fa05ea9a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff626d24667d1ce6888ac34600db16c7779e3e7daa61581a1c15d556bb284dd
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e56b27d50a3b069272092971e132d0f9ff41c52313b7354d6ff1c765fa05ea9a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff626d24667d1ce6888ac34600db16c7779e3e7daa61581a1c15d556bb284dd
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e56b27d50a3b069272092971e132d0f9ff41c52313b7354d6ff1c765fa05ea9a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff626d24667d1ce6888ac34600db16c7779e3e7daa61581a1c15d556bb284dd
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e56b27d50a3b069272092971e132d0f9ff41c52313b7354d6ff1c765fa05ea9a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff626d24667d1ce6888ac34600db16c7779e3e7daa61581a1c15d556bb284dd
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e56b27d50a3b069272092971e132d0f9ff41c52313b7354d6ff1c765fa05ea9a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff626d24667d1ce6888ac34600db16c7779e3e7daa61581a1c15d556bb284dd
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e56b27d50a3b069272092971e132d0f9ff41c52313b7354d6ff1c765fa05ea9a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff626d24667d1ce6888ac34600db16c7779e3e7daa61581a1c15d556bb284dd
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e56b27d50a3b069272092971e132d0f9ff41c52313b7354d6ff1c765fa05ea9a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff626d24667d1ce6888ac34600db16c7779e3e7daa61581a1c15d556bb284dd
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e56b27d50a3b069272092971e132d0f9ff41c52313b7354d6ff1c765fa05ea9a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff626d24667d1ce6888ac34600db16c7779e3e7daa61581a1c15d556bb284dd
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de6aa60c12b8dbcda06c27a65ac30e45d66753f2960bee09a7c0ea364ae7e103
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a05d01a7b2f868e6f0e645a01ef2a1ba3ad5ea16b1bff8e9cf0cf653b106d64f
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.351954737754218,
5
- "global_step": 210000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4206,11 +4206,211 @@
4206
  "eval_samples_per_second": 732.046,
4207
  "eval_steps_per_second": 11.713,
4208
  "step": 210000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4209
  }
4210
  ],
4211
  "max_steps": 500000,
4212
  "num_train_epochs": 13,
4213
- "total_flos": 6.709162519699275e+21,
4214
  "trial_name": null,
4215
  "trial_params": null
4216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.606809725266324,
5
+ "global_step": 220000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4206
  "eval_samples_per_second": 732.046,
4207
  "eval_steps_per_second": 11.713,
4208
  "step": 210000
4209
+ },
4210
+ {
4211
+ "epoch": 5.36,
4212
+ "learning_rate": 0.00020389135229309803,
4213
+ "loss": 0.2923,
4214
+ "step": 210500
4215
+ },
4216
+ {
4217
+ "epoch": 5.38,
4218
+ "learning_rate": 0.00020343965962416229,
4219
+ "loss": 0.2923,
4220
+ "step": 211000
4221
+ },
4222
+ {
4223
+ "epoch": 5.38,
4224
+ "eval_loss": 0.8264955878257751,
4225
+ "eval_runtime": 1.329,
4226
+ "eval_samples_per_second": 752.449,
4227
+ "eval_steps_per_second": 12.039,
4228
+ "step": 211000
4229
+ },
4230
+ {
4231
+ "epoch": 5.39,
4232
+ "learning_rate": 0.00020298743722686958,
4233
+ "loss": 0.2922,
4234
+ "step": 211500
4235
+ },
4236
+ {
4237
+ "epoch": 5.4,
4238
+ "learning_rate": 0.0002025346900466516,
4239
+ "loss": 0.2925,
4240
+ "step": 212000
4241
+ },
4242
+ {
4243
+ "epoch": 5.4,
4244
+ "eval_loss": 0.826949954032898,
4245
+ "eval_runtime": 1.2977,
4246
+ "eval_samples_per_second": 770.59,
4247
+ "eval_steps_per_second": 12.329,
4248
+ "step": 212000
4249
+ },
4250
+ {
4251
+ "epoch": 5.42,
4252
+ "learning_rate": 0.0002020814230346791,
4253
+ "loss": 0.2932,
4254
+ "step": 212500
4255
+ },
4256
+ {
4257
+ "epoch": 5.43,
4258
+ "learning_rate": 0.00020162764114780733,
4259
+ "loss": 0.2926,
4260
+ "step": 213000
4261
+ },
4262
+ {
4263
+ "epoch": 5.43,
4264
+ "eval_loss": 0.8337197303771973,
4265
+ "eval_runtime": 1.293,
4266
+ "eval_samples_per_second": 773.389,
4267
+ "eval_steps_per_second": 12.374,
4268
+ "step": 213000
4269
+ },
4270
+ {
4271
+ "epoch": 5.44,
4272
+ "learning_rate": 0.0002011733493485224,
4273
+ "loss": 0.2923,
4274
+ "step": 213500
4275
+ },
4276
+ {
4277
+ "epoch": 5.45,
4278
+ "learning_rate": 0.00020071855260488664,
4279
+ "loss": 0.292,
4280
+ "step": 214000
4281
+ },
4282
+ {
4283
+ "epoch": 5.45,
4284
+ "eval_loss": 0.8254857063293457,
4285
+ "eval_runtime": 1.2323,
4286
+ "eval_samples_per_second": 811.466,
4287
+ "eval_steps_per_second": 12.983,
4288
+ "step": 214000
4289
+ },
4290
+ {
4291
+ "epoch": 5.47,
4292
+ "learning_rate": 0.0002002632558904843,
4293
+ "loss": 0.2919,
4294
+ "step": 214500
4295
+ },
4296
+ {
4297
+ "epoch": 5.48,
4298
+ "learning_rate": 0.00019980746418436736,
4299
+ "loss": 0.292,
4300
+ "step": 215000
4301
+ },
4302
+ {
4303
+ "epoch": 5.48,
4304
+ "eval_loss": 0.8224061131477356,
4305
+ "eval_runtime": 1.2655,
4306
+ "eval_samples_per_second": 790.229,
4307
+ "eval_steps_per_second": 12.644,
4308
+ "step": 215000
4309
+ },
4310
+ {
4311
+ "epoch": 5.49,
4312
+ "learning_rate": 0.00019935118247100088,
4313
+ "loss": 0.2919,
4314
+ "step": 215500
4315
+ },
4316
+ {
4317
+ "epoch": 5.5,
4318
+ "learning_rate": 0.00019889441574020864,
4319
+ "loss": 0.2915,
4320
+ "step": 216000
4321
+ },
4322
+ {
4323
+ "epoch": 5.5,
4324
+ "eval_loss": 0.8216863870620728,
4325
+ "eval_runtime": 1.2862,
4326
+ "eval_samples_per_second": 777.468,
4327
+ "eval_steps_per_second": 12.439,
4328
+ "step": 216000
4329
+ },
4330
+ {
4331
+ "epoch": 5.52,
4332
+ "learning_rate": 0.0001984371689871183,
4333
+ "loss": 0.2918,
4334
+ "step": 216500
4335
+ },
4336
+ {
4337
+ "epoch": 5.53,
4338
+ "learning_rate": 0.00019797944721210725,
4339
+ "loss": 0.2916,
4340
+ "step": 217000
4341
+ },
4342
+ {
4343
+ "epoch": 5.53,
4344
+ "eval_loss": 0.8251473307609558,
4345
+ "eval_runtime": 1.3615,
4346
+ "eval_samples_per_second": 734.51,
4347
+ "eval_steps_per_second": 11.752,
4348
+ "step": 217000
4349
+ },
4350
+ {
4351
+ "epoch": 5.54,
4352
+ "learning_rate": 0.00019752125542074736,
4353
+ "loss": 0.2913,
4354
+ "step": 217500
4355
+ },
4356
+ {
4357
+ "epoch": 5.56,
4358
+ "learning_rate": 0.00019706259862375074,
4359
+ "loss": 0.291,
4360
+ "step": 218000
4361
+ },
4362
+ {
4363
+ "epoch": 5.56,
4364
+ "eval_loss": 0.8243631720542908,
4365
+ "eval_runtime": 1.2451,
4366
+ "eval_samples_per_second": 803.116,
4367
+ "eval_steps_per_second": 12.85,
4368
+ "step": 218000
4369
+ },
4370
+ {
4371
+ "epoch": 5.57,
4372
+ "learning_rate": 0.00019660348183691453,
4373
+ "loss": 0.292,
4374
+ "step": 218500
4375
+ },
4376
+ {
4377
+ "epoch": 5.58,
4378
+ "learning_rate": 0.0001961439100810664,
4379
+ "loss": 0.2918,
4380
+ "step": 219000
4381
+ },
4382
+ {
4383
+ "epoch": 5.58,
4384
+ "eval_loss": 0.8229220509529114,
4385
+ "eval_runtime": 1.3199,
4386
+ "eval_samples_per_second": 757.608,
4387
+ "eval_steps_per_second": 12.122,
4388
+ "step": 219000
4389
+ },
4390
+ {
4391
+ "epoch": 5.59,
4392
+ "learning_rate": 0.00019568388838200952,
4393
+ "loss": 0.2916,
4394
+ "step": 219500
4395
+ },
4396
+ {
4397
+ "epoch": 5.61,
4398
+ "learning_rate": 0.00019522342177046744,
4399
+ "loss": 0.2911,
4400
+ "step": 220000
4401
+ },
4402
+ {
4403
+ "epoch": 5.61,
4404
+ "eval_loss": 0.824487566947937,
4405
+ "eval_runtime": 1.2486,
4406
+ "eval_samples_per_second": 800.922,
4407
+ "eval_steps_per_second": 12.815,
4408
+ "step": 220000
4409
  }
4410
  ],
4411
  "max_steps": 500000,
4412
  "num_train_epochs": 13,
4413
+ "total_flos": 7.028649658473775e+21,
4414
  "trial_name": null,
4415
  "trial_params": null
4416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:500bbe867ded0d4e115c70f239c6f305c19492ee5158d2b7bb72cd84b524f5aa
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:027aff93cad77a00849e3e1032a09ce43e2d9d45511b5e4cbbe8559338ed73ad
3
  size 102501541