plip commited on
Commit
9156c9d
1 Parent(s): c3f7238

Training in progress, step 120000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5077b4837e7db854bd139cb7b83c4884833063cbbcc0c135891079eb84eb5023
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0147a34f11e7c1f08c8f5a1a7b664f7685f6024ff50f7e310c340458a7ae47df
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b862e1a9d6ed89097b4519d1b3cde5a169841069d056cef214437ef6987452e1
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6fe95a3571e9df68816ac46ccf405f0fd55b7d69662da0acabe531bd8d6e1a7
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a28dddede4fc4bc0b2366a2adf3a2a7cc15aa053d48f5784818c30e8f581c8c
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13dfeb18522f460b3bcc4a8a207781907714f968fa9018d9e319a8e4cb1a2731
3
+ size 14439
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9027fde6d34c0d3df23bed16cbdb4322526cc2d0a788564dc5bdbc1a2a3cc939
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c93d998395352ee5e4e32f058b4acd76a3b9750cf1106a383769865ab27f4f83
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ba2bcf5cf6c5910865dc9ad1b993b583122f7246dff018513acde53be4e1cef
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf0bf23f928568c7a3122050cfa99355815fe8c7f93429bc6536f64596e3cd0c
3
+ size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c0ded2329ef53920cb585a63eb47585e2a859a8641b80120f9a203cf7e7c919
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ea81d3c492be115b8468d1cd1e3438b2c19fcafe21dc467d287c05e9211418f
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0b2023ad615bca290ded4b09be5cde4d541c3da29f7726a5a43e59cc6dddea5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ccdbb1080343e6446b837638f679019013e2908bdad70d4dd13ba9861cbc74c
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41b811cc0631c90302be59bb28064c0b766f48934ca956ae5177890d0b44ff21
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b675771b8e40ada562deddfe20382c77a27fda6482714c03a215febc190e3d96
3
+ size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ace9c134526b185b1490c715e5c10cccba9ced2c0bbb4bf14ec35ff87f84192
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6547f6037906559622b01a609d68d147aca5926d35d56c61c1cb91881c5b9c16
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:094bb976952135775a16c6a0f91bc9d97a98726f15f9c9198b320fb5dc6d70ce
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e726e73c63cfe3e121582d5f26c95cee1a06ebdb69c1f3b859e1406e64a4d59
3
+ size 14439
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a36392859753540b5ff28a6690e0fb35c1157de322529d1ae210898db91ddda7
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60ddf5c33f60196860027b40e8443017b8ed479cc5dfb73f8a076fe07f546fd4
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.370201292931769,
5
- "global_step": 110000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2206,11 +2206,211 @@
2206
  "eval_samples_per_second": 1850.736,
2207
  "eval_steps_per_second": 29.612,
2208
  "step": 110000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2209
  }
2210
  ],
2211
  "max_steps": 500000,
2212
  "num_train_epochs": 16,
2213
- "total_flos": 3.5143345626635724e+21,
2214
  "trial_name": null,
2215
  "trial_params": null
2216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.676583228652839,
5
+ "global_step": 120000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2206
  "eval_samples_per_second": 1850.736,
2207
  "eval_steps_per_second": 29.612,
2208
  "step": 110000
2209
+ },
2210
+ {
2211
+ "epoch": 3.39,
2212
+ "learning_rate": 0.0002774275491977922,
2213
+ "loss": 0.3703,
2214
+ "step": 110500
2215
+ },
2216
+ {
2217
+ "epoch": 3.4,
2218
+ "learning_rate": 0.0002771699479464853,
2219
+ "loss": 0.3701,
2220
+ "step": 111000
2221
+ },
2222
+ {
2223
+ "epoch": 3.4,
2224
+ "eval_loss": 0.7903389930725098,
2225
+ "eval_runtime": 0.5256,
2226
+ "eval_samples_per_second": 1902.556,
2227
+ "eval_steps_per_second": 30.441,
2228
+ "step": 111000
2229
+ },
2230
+ {
2231
+ "epoch": 3.42,
2232
+ "learning_rate": 0.00027691101066420104,
2233
+ "loss": 0.3694,
2234
+ "step": 111500
2235
+ },
2236
+ {
2237
+ "epoch": 3.43,
2238
+ "learning_rate": 0.0002766507401826361,
2239
+ "loss": 0.3692,
2240
+ "step": 112000
2241
+ },
2242
+ {
2243
+ "epoch": 3.43,
2244
+ "eval_loss": 0.7830519080162048,
2245
+ "eval_runtime": 0.5162,
2246
+ "eval_samples_per_second": 1937.313,
2247
+ "eval_steps_per_second": 30.997,
2248
+ "step": 112000
2249
+ },
2250
+ {
2251
+ "epoch": 3.45,
2252
+ "learning_rate": 0.0002763891393480666,
2253
+ "loss": 0.3694,
2254
+ "step": 112500
2255
+ },
2256
+ {
2257
+ "epoch": 3.46,
2258
+ "learning_rate": 0.0002761262110213175,
2259
+ "loss": 0.3686,
2260
+ "step": 113000
2261
+ },
2262
+ {
2263
+ "epoch": 3.46,
2264
+ "eval_loss": 0.7800700664520264,
2265
+ "eval_runtime": 0.5106,
2266
+ "eval_samples_per_second": 1958.342,
2267
+ "eval_steps_per_second": 31.333,
2268
+ "step": 113000
2269
+ },
2270
+ {
2271
+ "epoch": 3.48,
2272
+ "learning_rate": 0.00027586195807773083,
2273
+ "loss": 0.3689,
2274
+ "step": 113500
2275
+ },
2276
+ {
2277
+ "epoch": 3.49,
2278
+ "learning_rate": 0.00027559638340713435,
2279
+ "loss": 0.3685,
2280
+ "step": 114000
2281
+ },
2282
+ {
2283
+ "epoch": 3.49,
2284
+ "eval_loss": 0.7914212942123413,
2285
+ "eval_runtime": 0.5261,
2286
+ "eval_samples_per_second": 1900.85,
2287
+ "eval_steps_per_second": 30.414,
2288
+ "step": 114000
2289
+ },
2290
+ {
2291
+ "epoch": 3.51,
2292
+ "learning_rate": 0.00027532948991381025,
2293
+ "loss": 0.3686,
2294
+ "step": 114500
2295
+ },
2296
+ {
2297
+ "epoch": 3.52,
2298
+ "learning_rate": 0.00027506128051646287,
2299
+ "loss": 0.3681,
2300
+ "step": 115000
2301
+ },
2302
+ {
2303
+ "epoch": 3.52,
2304
+ "eval_loss": 0.7931650280952454,
2305
+ "eval_runtime": 0.5617,
2306
+ "eval_samples_per_second": 1780.334,
2307
+ "eval_steps_per_second": 28.485,
2308
+ "step": 115000
2309
+ },
2310
+ {
2311
+ "epoch": 3.54,
2312
+ "learning_rate": 0.00027479175814818733,
2313
+ "loss": 0.368,
2314
+ "step": 115500
2315
+ },
2316
+ {
2317
+ "epoch": 3.55,
2318
+ "learning_rate": 0.000274520925756437,
2319
+ "loss": 0.3679,
2320
+ "step": 116000
2321
+ },
2322
+ {
2323
+ "epoch": 3.55,
2324
+ "eval_loss": 0.7921015620231628,
2325
+ "eval_runtime": 0.5338,
2326
+ "eval_samples_per_second": 1873.268,
2327
+ "eval_steps_per_second": 29.972,
2328
+ "step": 116000
2329
+ },
2330
+ {
2331
+ "epoch": 3.57,
2332
+ "learning_rate": 0.00027424878630299157,
2333
+ "loss": 0.3673,
2334
+ "step": 116500
2335
+ },
2336
+ {
2337
+ "epoch": 3.58,
2338
+ "learning_rate": 0.0002739753427639244,
2339
+ "loss": 0.3673,
2340
+ "step": 117000
2341
+ },
2342
+ {
2343
+ "epoch": 3.58,
2344
+ "eval_loss": 0.7863946557044983,
2345
+ "eval_runtime": 0.528,
2346
+ "eval_samples_per_second": 1893.847,
2347
+ "eval_steps_per_second": 30.302,
2348
+ "step": 117000
2349
+ },
2350
+ {
2351
+ "epoch": 3.6,
2352
+ "learning_rate": 0.0002737005981295704,
2353
+ "loss": 0.3673,
2354
+ "step": 117500
2355
+ },
2356
+ {
2357
+ "epoch": 3.62,
2358
+ "learning_rate": 0.0002734245554044927,
2359
+ "loss": 0.3668,
2360
+ "step": 118000
2361
+ },
2362
+ {
2363
+ "epoch": 3.62,
2364
+ "eval_loss": 0.7858835458755493,
2365
+ "eval_runtime": 0.5221,
2366
+ "eval_samples_per_second": 1915.181,
2367
+ "eval_steps_per_second": 30.643,
2368
+ "step": 118000
2369
+ },
2370
+ {
2371
+ "epoch": 3.63,
2372
+ "learning_rate": 0.0002731472176074504,
2373
+ "loss": 0.3667,
2374
+ "step": 118500
2375
+ },
2376
+ {
2377
+ "epoch": 3.65,
2378
+ "learning_rate": 0.0002728685877713653,
2379
+ "loss": 0.3669,
2380
+ "step": 119000
2381
+ },
2382
+ {
2383
+ "epoch": 3.65,
2384
+ "eval_loss": 0.786138653755188,
2385
+ "eval_runtime": 0.5283,
2386
+ "eval_samples_per_second": 1893.025,
2387
+ "eval_steps_per_second": 30.288,
2388
+ "step": 119000
2389
+ },
2390
+ {
2391
+ "epoch": 3.66,
2392
+ "learning_rate": 0.0002725886689432884,
2393
+ "loss": 0.3664,
2394
+ "step": 119500
2395
+ },
2396
+ {
2397
+ "epoch": 3.68,
2398
+ "learning_rate": 0.0002723074641843674,
2399
+ "loss": 0.3664,
2400
+ "step": 120000
2401
+ },
2402
+ {
2403
+ "epoch": 3.68,
2404
+ "eval_loss": 0.7753082513809204,
2405
+ "eval_runtime": 0.539,
2406
+ "eval_samples_per_second": 1855.209,
2407
+ "eval_steps_per_second": 29.683,
2408
+ "step": 120000
2409
  }
2410
  ],
2411
  "max_steps": 500000,
2412
  "num_train_epochs": 16,
2413
+ "total_flos": 3.8338217014380724e+21,
2414
  "trial_name": null,
2415
  "trial_params": null
2416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b862e1a9d6ed89097b4519d1b3cde5a169841069d056cef214437ef6987452e1
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6fe95a3571e9df68816ac46ccf405f0fd55b7d69662da0acabe531bd8d6e1a7
3
  size 102501541