abdiharyadi commited on
Commit
faf95da
1 Parent(s): 4bf1f7f

Training in progress, step 10800, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3fad6a2bd8c0561496dc3383467f8817292411148c9873036bdb69e35b5543d
3
  size 1575259780
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e207271ed9fc29b099863defab32d4952b4913bf12a40b3e58c16c30ace3354
3
  size 1575259780
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2cff55d576f8616b2e2cecc6803e8b473e51aac0379039756299682d060075c
3
  size 3150397656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fef1272e9932d85f4a0434d895c0520855facd99c96206dd3c6f6de6dc78ba1a
3
  size 3150397656
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d43647c33bad1e742cdbb1fe16026c6964f3f2731dbafc990c955d70accc926b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:725551c540c5935ca09ad257e1ec602a9cde53567df7545ad488987db5f81d2a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72d5d233f2b2864a95b1df30795943c1ca367974d3ff0da03626224e18769293
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7a14dd5679ca365b7036c52129c5a5a2c8f2459be0b4d29ecb21c4b219dee84
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 44.4125,
3
- "best_model_checkpoint": "/kaggle/working/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted-amr-generation-v2-fted/checkpoint-7200",
4
- "epoch": 79.47019867549669,
5
  "eval_steps": 3600,
6
- "global_step": 7200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2193,6 +2193,1096 @@
2193
  "eval_samples_per_second": 8.751,
2194
  "eval_steps_per_second": 1.75,
2195
  "step": 7200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2196
  }
2197
  ],
2198
  "logging_steps": 20,
@@ -2212,7 +3302,7 @@
2212
  "attributes": {}
2213
  }
2214
  },
2215
- "total_flos": 2.128877387926733e+16,
2216
  "train_batch_size": 5,
2217
  "trial_name": null,
2218
  "trial_params": null
 
1
  {
2
+ "best_metric": 49.121,
3
+ "best_model_checkpoint": "/kaggle/working/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted-amr-generation-v2-fted/checkpoint-10800",
4
+ "epoch": 119.20529801324503,
5
  "eval_steps": 3600,
6
+ "global_step": 10800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2193
  "eval_samples_per_second": 8.751,
2194
  "eval_steps_per_second": 1.75,
2195
  "step": 7200
2196
+ },
2197
+ {
2198
+ "epoch": 79.69094922737307,
2199
+ "learning_rate": 8.899303135888502e-07,
2200
+ "loss": 1.4294,
2201
+ "step": 7220
2202
+ },
2203
+ {
2204
+ "epoch": 79.91169977924945,
2205
+ "learning_rate": 8.896167247386759e-07,
2206
+ "loss": 1.4691,
2207
+ "step": 7240
2208
+ },
2209
+ {
2210
+ "epoch": 80.13245033112582,
2211
+ "learning_rate": 8.893031358885017e-07,
2212
+ "loss": 1.4619,
2213
+ "step": 7260
2214
+ },
2215
+ {
2216
+ "epoch": 80.3532008830022,
2217
+ "learning_rate": 8.889895470383275e-07,
2218
+ "loss": 1.4952,
2219
+ "step": 7280
2220
+ },
2221
+ {
2222
+ "epoch": 80.57395143487858,
2223
+ "learning_rate": 8.886759581881533e-07,
2224
+ "loss": 1.4284,
2225
+ "step": 7300
2226
+ },
2227
+ {
2228
+ "epoch": 80.79470198675497,
2229
+ "learning_rate": 8.88362369337979e-07,
2230
+ "loss": 1.3992,
2231
+ "step": 7320
2232
+ },
2233
+ {
2234
+ "epoch": 81.01545253863135,
2235
+ "learning_rate": 8.880487804878048e-07,
2236
+ "loss": 1.4512,
2237
+ "step": 7340
2238
+ },
2239
+ {
2240
+ "epoch": 81.23620309050773,
2241
+ "learning_rate": 8.877351916376307e-07,
2242
+ "loss": 1.4298,
2243
+ "step": 7360
2244
+ },
2245
+ {
2246
+ "epoch": 81.45695364238411,
2247
+ "learning_rate": 8.874216027874564e-07,
2248
+ "loss": 1.4442,
2249
+ "step": 7380
2250
+ },
2251
+ {
2252
+ "epoch": 81.67770419426049,
2253
+ "learning_rate": 8.871080139372822e-07,
2254
+ "loss": 1.4112,
2255
+ "step": 7400
2256
+ },
2257
+ {
2258
+ "epoch": 81.89845474613686,
2259
+ "learning_rate": 8.867944250871079e-07,
2260
+ "loss": 1.4365,
2261
+ "step": 7420
2262
+ },
2263
+ {
2264
+ "epoch": 82.11920529801324,
2265
+ "learning_rate": 8.864808362369338e-07,
2266
+ "loss": 1.5076,
2267
+ "step": 7440
2268
+ },
2269
+ {
2270
+ "epoch": 82.33995584988962,
2271
+ "learning_rate": 8.861672473867594e-07,
2272
+ "loss": 1.4004,
2273
+ "step": 7460
2274
+ },
2275
+ {
2276
+ "epoch": 82.560706401766,
2277
+ "learning_rate": 8.858536585365853e-07,
2278
+ "loss": 1.4641,
2279
+ "step": 7480
2280
+ },
2281
+ {
2282
+ "epoch": 82.78145695364239,
2283
+ "learning_rate": 8.855400696864112e-07,
2284
+ "loss": 1.4321,
2285
+ "step": 7500
2286
+ },
2287
+ {
2288
+ "epoch": 83.00220750551877,
2289
+ "learning_rate": 8.85226480836237e-07,
2290
+ "loss": 1.4592,
2291
+ "step": 7520
2292
+ },
2293
+ {
2294
+ "epoch": 83.22295805739515,
2295
+ "learning_rate": 8.849128919860627e-07,
2296
+ "loss": 1.4101,
2297
+ "step": 7540
2298
+ },
2299
+ {
2300
+ "epoch": 83.44370860927152,
2301
+ "learning_rate": 8.845993031358884e-07,
2302
+ "loss": 1.4455,
2303
+ "step": 7560
2304
+ },
2305
+ {
2306
+ "epoch": 83.6644591611479,
2307
+ "learning_rate": 8.842857142857143e-07,
2308
+ "loss": 1.4175,
2309
+ "step": 7580
2310
+ },
2311
+ {
2312
+ "epoch": 83.88520971302428,
2313
+ "learning_rate": 8.8397212543554e-07,
2314
+ "loss": 1.4389,
2315
+ "step": 7600
2316
+ },
2317
+ {
2318
+ "epoch": 84.10596026490066,
2319
+ "learning_rate": 8.836585365853658e-07,
2320
+ "loss": 1.4963,
2321
+ "step": 7620
2322
+ },
2323
+ {
2324
+ "epoch": 84.32671081677704,
2325
+ "learning_rate": 8.833449477351916e-07,
2326
+ "loss": 1.4683,
2327
+ "step": 7640
2328
+ },
2329
+ {
2330
+ "epoch": 84.54746136865342,
2331
+ "learning_rate": 8.830313588850174e-07,
2332
+ "loss": 1.4528,
2333
+ "step": 7660
2334
+ },
2335
+ {
2336
+ "epoch": 84.76821192052981,
2337
+ "learning_rate": 8.827177700348431e-07,
2338
+ "loss": 1.4128,
2339
+ "step": 7680
2340
+ },
2341
+ {
2342
+ "epoch": 84.98896247240619,
2343
+ "learning_rate": 8.824041811846689e-07,
2344
+ "loss": 1.4012,
2345
+ "step": 7700
2346
+ },
2347
+ {
2348
+ "epoch": 85.20971302428256,
2349
+ "learning_rate": 8.820905923344947e-07,
2350
+ "loss": 1.4448,
2351
+ "step": 7720
2352
+ },
2353
+ {
2354
+ "epoch": 85.43046357615894,
2355
+ "learning_rate": 8.817770034843205e-07,
2356
+ "loss": 1.3746,
2357
+ "step": 7740
2358
+ },
2359
+ {
2360
+ "epoch": 85.65121412803532,
2361
+ "learning_rate": 8.814634146341464e-07,
2362
+ "loss": 1.4086,
2363
+ "step": 7760
2364
+ },
2365
+ {
2366
+ "epoch": 85.8719646799117,
2367
+ "learning_rate": 8.811498257839721e-07,
2368
+ "loss": 1.4774,
2369
+ "step": 7780
2370
+ },
2371
+ {
2372
+ "epoch": 86.09271523178808,
2373
+ "learning_rate": 8.808362369337979e-07,
2374
+ "loss": 1.4534,
2375
+ "step": 7800
2376
+ },
2377
+ {
2378
+ "epoch": 86.31346578366445,
2379
+ "learning_rate": 8.805226480836237e-07,
2380
+ "loss": 1.3844,
2381
+ "step": 7820
2382
+ },
2383
+ {
2384
+ "epoch": 86.53421633554083,
2385
+ "learning_rate": 8.802090592334494e-07,
2386
+ "loss": 1.4751,
2387
+ "step": 7840
2388
+ },
2389
+ {
2390
+ "epoch": 86.75496688741723,
2391
+ "learning_rate": 8.798954703832752e-07,
2392
+ "loss": 1.4002,
2393
+ "step": 7860
2394
+ },
2395
+ {
2396
+ "epoch": 86.9757174392936,
2397
+ "learning_rate": 8.79581881533101e-07,
2398
+ "loss": 1.4659,
2399
+ "step": 7880
2400
+ },
2401
+ {
2402
+ "epoch": 87.19646799116998,
2403
+ "learning_rate": 8.792682926829268e-07,
2404
+ "loss": 1.4322,
2405
+ "step": 7900
2406
+ },
2407
+ {
2408
+ "epoch": 87.41721854304636,
2409
+ "learning_rate": 8.789547038327526e-07,
2410
+ "loss": 1.4278,
2411
+ "step": 7920
2412
+ },
2413
+ {
2414
+ "epoch": 87.63796909492274,
2415
+ "learning_rate": 8.786411149825783e-07,
2416
+ "loss": 1.453,
2417
+ "step": 7940
2418
+ },
2419
+ {
2420
+ "epoch": 87.85871964679912,
2421
+ "learning_rate": 8.783275261324042e-07,
2422
+ "loss": 1.4371,
2423
+ "step": 7960
2424
+ },
2425
+ {
2426
+ "epoch": 88.0794701986755,
2427
+ "learning_rate": 8.780139372822298e-07,
2428
+ "loss": 1.4775,
2429
+ "step": 7980
2430
+ },
2431
+ {
2432
+ "epoch": 88.30022075055187,
2433
+ "learning_rate": 8.777003484320557e-07,
2434
+ "loss": 1.4058,
2435
+ "step": 8000
2436
+ },
2437
+ {
2438
+ "epoch": 88.52097130242825,
2439
+ "learning_rate": 8.773867595818815e-07,
2440
+ "loss": 1.4119,
2441
+ "step": 8020
2442
+ },
2443
+ {
2444
+ "epoch": 88.74172185430463,
2445
+ "learning_rate": 8.770731707317073e-07,
2446
+ "loss": 1.4316,
2447
+ "step": 8040
2448
+ },
2449
+ {
2450
+ "epoch": 88.96247240618102,
2451
+ "learning_rate": 8.767595818815331e-07,
2452
+ "loss": 1.4198,
2453
+ "step": 8060
2454
+ },
2455
+ {
2456
+ "epoch": 89.1832229580574,
2457
+ "learning_rate": 8.764459930313588e-07,
2458
+ "loss": 1.4434,
2459
+ "step": 8080
2460
+ },
2461
+ {
2462
+ "epoch": 89.40397350993378,
2463
+ "learning_rate": 8.761324041811848e-07,
2464
+ "loss": 1.4309,
2465
+ "step": 8100
2466
+ },
2467
+ {
2468
+ "epoch": 89.62472406181016,
2469
+ "learning_rate": 8.758188153310104e-07,
2470
+ "loss": 1.3757,
2471
+ "step": 8120
2472
+ },
2473
+ {
2474
+ "epoch": 89.84547461368653,
2475
+ "learning_rate": 8.755052264808362e-07,
2476
+ "loss": 1.479,
2477
+ "step": 8140
2478
+ },
2479
+ {
2480
+ "epoch": 90.06622516556291,
2481
+ "learning_rate": 8.751916376306619e-07,
2482
+ "loss": 1.3926,
2483
+ "step": 8160
2484
+ },
2485
+ {
2486
+ "epoch": 90.28697571743929,
2487
+ "learning_rate": 8.748780487804878e-07,
2488
+ "loss": 1.4629,
2489
+ "step": 8180
2490
+ },
2491
+ {
2492
+ "epoch": 90.50772626931567,
2493
+ "learning_rate": 8.745644599303135e-07,
2494
+ "loss": 1.3793,
2495
+ "step": 8200
2496
+ },
2497
+ {
2498
+ "epoch": 90.72847682119205,
2499
+ "learning_rate": 8.742508710801393e-07,
2500
+ "loss": 1.4113,
2501
+ "step": 8220
2502
+ },
2503
+ {
2504
+ "epoch": 90.94922737306844,
2505
+ "learning_rate": 8.739372822299651e-07,
2506
+ "loss": 1.4187,
2507
+ "step": 8240
2508
+ },
2509
+ {
2510
+ "epoch": 91.16997792494482,
2511
+ "learning_rate": 8.73623693379791e-07,
2512
+ "loss": 1.4466,
2513
+ "step": 8260
2514
+ },
2515
+ {
2516
+ "epoch": 91.3907284768212,
2517
+ "learning_rate": 8.733101045296167e-07,
2518
+ "loss": 1.4219,
2519
+ "step": 8280
2520
+ },
2521
+ {
2522
+ "epoch": 91.61147902869757,
2523
+ "learning_rate": 8.729965156794424e-07,
2524
+ "loss": 1.4783,
2525
+ "step": 8300
2526
+ },
2527
+ {
2528
+ "epoch": 91.83222958057395,
2529
+ "learning_rate": 8.726829268292683e-07,
2530
+ "loss": 1.4111,
2531
+ "step": 8320
2532
+ },
2533
+ {
2534
+ "epoch": 92.05298013245033,
2535
+ "learning_rate": 8.723693379790941e-07,
2536
+ "loss": 1.427,
2537
+ "step": 8340
2538
+ },
2539
+ {
2540
+ "epoch": 92.27373068432671,
2541
+ "learning_rate": 8.720557491289198e-07,
2542
+ "loss": 1.414,
2543
+ "step": 8360
2544
+ },
2545
+ {
2546
+ "epoch": 92.49448123620309,
2547
+ "learning_rate": 8.717421602787456e-07,
2548
+ "loss": 1.4097,
2549
+ "step": 8380
2550
+ },
2551
+ {
2552
+ "epoch": 92.71523178807946,
2553
+ "learning_rate": 8.714285714285715e-07,
2554
+ "loss": 1.4818,
2555
+ "step": 8400
2556
+ },
2557
+ {
2558
+ "epoch": 92.93598233995586,
2559
+ "learning_rate": 8.711149825783971e-07,
2560
+ "loss": 1.4127,
2561
+ "step": 8420
2562
+ },
2563
+ {
2564
+ "epoch": 93.15673289183223,
2565
+ "learning_rate": 8.708013937282229e-07,
2566
+ "loss": 1.4264,
2567
+ "step": 8440
2568
+ },
2569
+ {
2570
+ "epoch": 93.37748344370861,
2571
+ "learning_rate": 8.704878048780487e-07,
2572
+ "loss": 1.4663,
2573
+ "step": 8460
2574
+ },
2575
+ {
2576
+ "epoch": 93.59823399558499,
2577
+ "learning_rate": 8.701742160278746e-07,
2578
+ "loss": 1.4151,
2579
+ "step": 8480
2580
+ },
2581
+ {
2582
+ "epoch": 93.81898454746137,
2583
+ "learning_rate": 8.698606271777002e-07,
2584
+ "loss": 1.4362,
2585
+ "step": 8500
2586
+ },
2587
+ {
2588
+ "epoch": 94.03973509933775,
2589
+ "learning_rate": 8.695470383275261e-07,
2590
+ "loss": 1.3755,
2591
+ "step": 8520
2592
+ },
2593
+ {
2594
+ "epoch": 94.26048565121413,
2595
+ "learning_rate": 8.69233449477352e-07,
2596
+ "loss": 1.3927,
2597
+ "step": 8540
2598
+ },
2599
+ {
2600
+ "epoch": 94.4812362030905,
2601
+ "learning_rate": 8.689198606271777e-07,
2602
+ "loss": 1.4137,
2603
+ "step": 8560
2604
+ },
2605
+ {
2606
+ "epoch": 94.70198675496688,
2607
+ "learning_rate": 8.686062717770035e-07,
2608
+ "loss": 1.4284,
2609
+ "step": 8580
2610
+ },
2611
+ {
2612
+ "epoch": 94.92273730684327,
2613
+ "learning_rate": 8.682926829268292e-07,
2614
+ "loss": 1.4287,
2615
+ "step": 8600
2616
+ },
2617
+ {
2618
+ "epoch": 95.14348785871965,
2619
+ "learning_rate": 8.679790940766551e-07,
2620
+ "loss": 1.4243,
2621
+ "step": 8620
2622
+ },
2623
+ {
2624
+ "epoch": 95.36423841059603,
2625
+ "learning_rate": 8.676655052264807e-07,
2626
+ "loss": 1.3588,
2627
+ "step": 8640
2628
+ },
2629
+ {
2630
+ "epoch": 95.58498896247241,
2631
+ "learning_rate": 8.673519163763066e-07,
2632
+ "loss": 1.3846,
2633
+ "step": 8660
2634
+ },
2635
+ {
2636
+ "epoch": 95.80573951434879,
2637
+ "learning_rate": 8.670383275261325e-07,
2638
+ "loss": 1.4513,
2639
+ "step": 8680
2640
+ },
2641
+ {
2642
+ "epoch": 96.02649006622516,
2643
+ "learning_rate": 8.667247386759582e-07,
2644
+ "loss": 1.4022,
2645
+ "step": 8700
2646
+ },
2647
+ {
2648
+ "epoch": 96.24724061810154,
2649
+ "learning_rate": 8.664111498257838e-07,
2650
+ "loss": 1.3969,
2651
+ "step": 8720
2652
+ },
2653
+ {
2654
+ "epoch": 96.46799116997792,
2655
+ "learning_rate": 8.660975609756097e-07,
2656
+ "loss": 1.38,
2657
+ "step": 8740
2658
+ },
2659
+ {
2660
+ "epoch": 96.6887417218543,
2661
+ "learning_rate": 8.657839721254355e-07,
2662
+ "loss": 1.4701,
2663
+ "step": 8760
2664
+ },
2665
+ {
2666
+ "epoch": 96.90949227373068,
2667
+ "learning_rate": 8.654703832752613e-07,
2668
+ "loss": 1.3637,
2669
+ "step": 8780
2670
+ },
2671
+ {
2672
+ "epoch": 97.13024282560707,
2673
+ "learning_rate": 8.65156794425087e-07,
2674
+ "loss": 1.3748,
2675
+ "step": 8800
2676
+ },
2677
+ {
2678
+ "epoch": 97.35099337748345,
2679
+ "learning_rate": 8.648432055749129e-07,
2680
+ "loss": 1.461,
2681
+ "step": 8820
2682
+ },
2683
+ {
2684
+ "epoch": 97.57174392935983,
2685
+ "learning_rate": 8.645296167247387e-07,
2686
+ "loss": 1.4615,
2687
+ "step": 8840
2688
+ },
2689
+ {
2690
+ "epoch": 97.7924944812362,
2691
+ "learning_rate": 8.642160278745644e-07,
2692
+ "loss": 1.3775,
2693
+ "step": 8860
2694
+ },
2695
+ {
2696
+ "epoch": 98.01324503311258,
2697
+ "learning_rate": 8.639024390243902e-07,
2698
+ "loss": 1.4125,
2699
+ "step": 8880
2700
+ },
2701
+ {
2702
+ "epoch": 98.23399558498896,
2703
+ "learning_rate": 8.63588850174216e-07,
2704
+ "loss": 1.3846,
2705
+ "step": 8900
2706
+ },
2707
+ {
2708
+ "epoch": 98.45474613686534,
2709
+ "learning_rate": 8.632752613240419e-07,
2710
+ "loss": 1.3948,
2711
+ "step": 8920
2712
+ },
2713
+ {
2714
+ "epoch": 98.67549668874172,
2715
+ "learning_rate": 8.629616724738675e-07,
2716
+ "loss": 1.348,
2717
+ "step": 8940
2718
+ },
2719
+ {
2720
+ "epoch": 98.8962472406181,
2721
+ "learning_rate": 8.626480836236934e-07,
2722
+ "loss": 1.4504,
2723
+ "step": 8960
2724
+ },
2725
+ {
2726
+ "epoch": 99.11699779249449,
2727
+ "learning_rate": 8.623344947735191e-07,
2728
+ "loss": 1.3899,
2729
+ "step": 8980
2730
+ },
2731
+ {
2732
+ "epoch": 99.33774834437087,
2733
+ "learning_rate": 8.62020905923345e-07,
2734
+ "loss": 1.4558,
2735
+ "step": 9000
2736
+ },
2737
+ {
2738
+ "epoch": 99.55849889624724,
2739
+ "learning_rate": 8.617073170731706e-07,
2740
+ "loss": 1.3781,
2741
+ "step": 9020
2742
+ },
2743
+ {
2744
+ "epoch": 99.77924944812362,
2745
+ "learning_rate": 8.613937282229965e-07,
2746
+ "loss": 1.3847,
2747
+ "step": 9040
2748
+ },
2749
+ {
2750
+ "epoch": 100.0,
2751
+ "learning_rate": 8.610801393728222e-07,
2752
+ "loss": 1.4043,
2753
+ "step": 9060
2754
+ },
2755
+ {
2756
+ "epoch": 100.22075055187638,
2757
+ "learning_rate": 8.60766550522648e-07,
2758
+ "loss": 1.4069,
2759
+ "step": 9080
2760
+ },
2761
+ {
2762
+ "epoch": 100.44150110375276,
2763
+ "learning_rate": 8.604529616724739e-07,
2764
+ "loss": 1.3562,
2765
+ "step": 9100
2766
+ },
2767
+ {
2768
+ "epoch": 100.66225165562913,
2769
+ "learning_rate": 8.601393728222996e-07,
2770
+ "loss": 1.3854,
2771
+ "step": 9120
2772
+ },
2773
+ {
2774
+ "epoch": 100.88300220750551,
2775
+ "learning_rate": 8.598257839721255e-07,
2776
+ "loss": 1.3842,
2777
+ "step": 9140
2778
+ },
2779
+ {
2780
+ "epoch": 101.1037527593819,
2781
+ "learning_rate": 8.595121951219512e-07,
2782
+ "loss": 1.3954,
2783
+ "step": 9160
2784
+ },
2785
+ {
2786
+ "epoch": 101.32450331125828,
2787
+ "learning_rate": 8.59198606271777e-07,
2788
+ "loss": 1.3966,
2789
+ "step": 9180
2790
+ },
2791
+ {
2792
+ "epoch": 101.54525386313466,
2793
+ "learning_rate": 8.588850174216027e-07,
2794
+ "loss": 1.4175,
2795
+ "step": 9200
2796
+ },
2797
+ {
2798
+ "epoch": 101.76600441501104,
2799
+ "learning_rate": 8.585714285714286e-07,
2800
+ "loss": 1.3944,
2801
+ "step": 9220
2802
+ },
2803
+ {
2804
+ "epoch": 101.98675496688742,
2805
+ "learning_rate": 8.582578397212543e-07,
2806
+ "loss": 1.343,
2807
+ "step": 9240
2808
+ },
2809
+ {
2810
+ "epoch": 102.2075055187638,
2811
+ "learning_rate": 8.579442508710801e-07,
2812
+ "loss": 1.3974,
2813
+ "step": 9260
2814
+ },
2815
+ {
2816
+ "epoch": 102.42825607064017,
2817
+ "learning_rate": 8.576306620209059e-07,
2818
+ "loss": 1.4235,
2819
+ "step": 9280
2820
+ },
2821
+ {
2822
+ "epoch": 102.64900662251655,
2823
+ "learning_rate": 8.573170731707317e-07,
2824
+ "loss": 1.379,
2825
+ "step": 9300
2826
+ },
2827
+ {
2828
+ "epoch": 102.86975717439293,
2829
+ "learning_rate": 8.570034843205574e-07,
2830
+ "loss": 1.3593,
2831
+ "step": 9320
2832
+ },
2833
+ {
2834
+ "epoch": 103.09050772626932,
2835
+ "learning_rate": 8.566898954703832e-07,
2836
+ "loss": 1.4164,
2837
+ "step": 9340
2838
+ },
2839
+ {
2840
+ "epoch": 103.3112582781457,
2841
+ "learning_rate": 8.563763066202091e-07,
2842
+ "loss": 1.3811,
2843
+ "step": 9360
2844
+ },
2845
+ {
2846
+ "epoch": 103.53200883002208,
2847
+ "learning_rate": 8.560627177700348e-07,
2848
+ "loss": 1.3853,
2849
+ "step": 9380
2850
+ },
2851
+ {
2852
+ "epoch": 103.75275938189846,
2853
+ "learning_rate": 8.557491289198606e-07,
2854
+ "loss": 1.3948,
2855
+ "step": 9400
2856
+ },
2857
+ {
2858
+ "epoch": 103.97350993377484,
2859
+ "learning_rate": 8.554355400696864e-07,
2860
+ "loss": 1.4206,
2861
+ "step": 9420
2862
+ },
2863
+ {
2864
+ "epoch": 104.19426048565121,
2865
+ "learning_rate": 8.551219512195122e-07,
2866
+ "loss": 1.3967,
2867
+ "step": 9440
2868
+ },
2869
+ {
2870
+ "epoch": 104.41501103752759,
2871
+ "learning_rate": 8.548083623693379e-07,
2872
+ "loss": 1.3597,
2873
+ "step": 9460
2874
+ },
2875
+ {
2876
+ "epoch": 104.63576158940397,
2877
+ "learning_rate": 8.544947735191637e-07,
2878
+ "loss": 1.4107,
2879
+ "step": 9480
2880
+ },
2881
+ {
2882
+ "epoch": 104.85651214128035,
2883
+ "learning_rate": 8.541811846689896e-07,
2884
+ "loss": 1.3626,
2885
+ "step": 9500
2886
+ },
2887
+ {
2888
+ "epoch": 105.07726269315673,
2889
+ "learning_rate": 8.538675958188153e-07,
2890
+ "loss": 1.4051,
2891
+ "step": 9520
2892
+ },
2893
+ {
2894
+ "epoch": 105.29801324503312,
2895
+ "learning_rate": 8.53554006968641e-07,
2896
+ "loss": 1.4063,
2897
+ "step": 9540
2898
+ },
2899
+ {
2900
+ "epoch": 105.5187637969095,
2901
+ "learning_rate": 8.532404181184669e-07,
2902
+ "loss": 1.4275,
2903
+ "step": 9560
2904
+ },
2905
+ {
2906
+ "epoch": 105.73951434878587,
2907
+ "learning_rate": 8.529268292682926e-07,
2908
+ "loss": 1.3462,
2909
+ "step": 9580
2910
+ },
2911
+ {
2912
+ "epoch": 105.96026490066225,
2913
+ "learning_rate": 8.526132404181184e-07,
2914
+ "loss": 1.3702,
2915
+ "step": 9600
2916
+ },
2917
+ {
2918
+ "epoch": 106.18101545253863,
2919
+ "learning_rate": 8.522996515679441e-07,
2920
+ "loss": 1.3683,
2921
+ "step": 9620
2922
+ },
2923
+ {
2924
+ "epoch": 106.40176600441501,
2925
+ "learning_rate": 8.5198606271777e-07,
2926
+ "loss": 1.4073,
2927
+ "step": 9640
2928
+ },
2929
+ {
2930
+ "epoch": 106.62251655629139,
2931
+ "learning_rate": 8.516724738675959e-07,
2932
+ "loss": 1.3627,
2933
+ "step": 9660
2934
+ },
2935
+ {
2936
+ "epoch": 106.84326710816777,
2937
+ "learning_rate": 8.513588850174215e-07,
2938
+ "loss": 1.4,
2939
+ "step": 9680
2940
+ },
2941
+ {
2942
+ "epoch": 107.06401766004414,
2943
+ "learning_rate": 8.510452961672474e-07,
2944
+ "loss": 1.3322,
2945
+ "step": 9700
2946
+ },
2947
+ {
2948
+ "epoch": 107.28476821192054,
2949
+ "learning_rate": 8.507317073170731e-07,
2950
+ "loss": 1.3776,
2951
+ "step": 9720
2952
+ },
2953
+ {
2954
+ "epoch": 107.50551876379691,
2955
+ "learning_rate": 8.50418118466899e-07,
2956
+ "loss": 1.3837,
2957
+ "step": 9740
2958
+ },
2959
+ {
2960
+ "epoch": 107.72626931567329,
2961
+ "learning_rate": 8.501045296167246e-07,
2962
+ "loss": 1.3633,
2963
+ "step": 9760
2964
+ },
2965
+ {
2966
+ "epoch": 107.94701986754967,
2967
+ "learning_rate": 8.497909407665505e-07,
2968
+ "loss": 1.4582,
2969
+ "step": 9780
2970
+ },
2971
+ {
2972
+ "epoch": 108.16777041942605,
2973
+ "learning_rate": 8.494773519163763e-07,
2974
+ "loss": 1.4204,
2975
+ "step": 9800
2976
+ },
2977
+ {
2978
+ "epoch": 108.38852097130243,
2979
+ "learning_rate": 8.49163763066202e-07,
2980
+ "loss": 1.3606,
2981
+ "step": 9820
2982
+ },
2983
+ {
2984
+ "epoch": 108.6092715231788,
2985
+ "learning_rate": 8.488501742160278e-07,
2986
+ "loss": 1.41,
2987
+ "step": 9840
2988
+ },
2989
+ {
2990
+ "epoch": 108.83002207505518,
2991
+ "learning_rate": 8.485365853658536e-07,
2992
+ "loss": 1.321,
2993
+ "step": 9860
2994
+ },
2995
+ {
2996
+ "epoch": 109.05077262693156,
2997
+ "learning_rate": 8.482229965156795e-07,
2998
+ "loss": 1.3471,
2999
+ "step": 9880
3000
+ },
3001
+ {
3002
+ "epoch": 109.27152317880795,
3003
+ "learning_rate": 8.479094076655051e-07,
3004
+ "loss": 1.3809,
3005
+ "step": 9900
3006
+ },
3007
+ {
3008
+ "epoch": 109.49227373068433,
3009
+ "learning_rate": 8.47595818815331e-07,
3010
+ "loss": 1.3795,
3011
+ "step": 9920
3012
+ },
3013
+ {
3014
+ "epoch": 109.71302428256071,
3015
+ "learning_rate": 8.472822299651568e-07,
3016
+ "loss": 1.3751,
3017
+ "step": 9940
3018
+ },
3019
+ {
3020
+ "epoch": 109.93377483443709,
3021
+ "learning_rate": 8.469686411149826e-07,
3022
+ "loss": 1.3513,
3023
+ "step": 9960
3024
+ },
3025
+ {
3026
+ "epoch": 110.15452538631347,
3027
+ "learning_rate": 8.466550522648083e-07,
3028
+ "loss": 1.3526,
3029
+ "step": 9980
3030
+ },
3031
+ {
3032
+ "epoch": 110.37527593818984,
3033
+ "learning_rate": 8.463414634146341e-07,
3034
+ "loss": 1.3819,
3035
+ "step": 10000
3036
+ },
3037
+ {
3038
+ "epoch": 110.59602649006622,
3039
+ "learning_rate": 8.460278745644599e-07,
3040
+ "loss": 1.3961,
3041
+ "step": 10020
3042
+ },
3043
+ {
3044
+ "epoch": 110.8167770419426,
3045
+ "learning_rate": 8.457142857142856e-07,
3046
+ "loss": 1.3934,
3047
+ "step": 10040
3048
+ },
3049
+ {
3050
+ "epoch": 111.03752759381898,
3051
+ "learning_rate": 8.454006968641114e-07,
3052
+ "loss": 1.3603,
3053
+ "step": 10060
3054
+ },
3055
+ {
3056
+ "epoch": 111.25827814569537,
3057
+ "learning_rate": 8.450871080139372e-07,
3058
+ "loss": 1.3933,
3059
+ "step": 10080
3060
+ },
3061
+ {
3062
+ "epoch": 111.47902869757175,
3063
+ "learning_rate": 8.44773519163763e-07,
3064
+ "loss": 1.381,
3065
+ "step": 10100
3066
+ },
3067
+ {
3068
+ "epoch": 111.69977924944813,
3069
+ "learning_rate": 8.444599303135888e-07,
3070
+ "loss": 1.4153,
3071
+ "step": 10120
3072
+ },
3073
+ {
3074
+ "epoch": 111.9205298013245,
3075
+ "learning_rate": 8.441463414634147e-07,
3076
+ "loss": 1.361,
3077
+ "step": 10140
3078
+ },
3079
+ {
3080
+ "epoch": 112.14128035320088,
3081
+ "learning_rate": 8.438327526132404e-07,
3082
+ "loss": 1.3341,
3083
+ "step": 10160
3084
+ },
3085
+ {
3086
+ "epoch": 112.36203090507726,
3087
+ "learning_rate": 8.435191637630662e-07,
3088
+ "loss": 1.3696,
3089
+ "step": 10180
3090
+ },
3091
+ {
3092
+ "epoch": 112.58278145695364,
3093
+ "learning_rate": 8.432055749128919e-07,
3094
+ "loss": 1.3899,
3095
+ "step": 10200
3096
+ },
3097
+ {
3098
+ "epoch": 112.80353200883002,
3099
+ "learning_rate": 8.428919860627178e-07,
3100
+ "loss": 1.3883,
3101
+ "step": 10220
3102
+ },
3103
+ {
3104
+ "epoch": 113.0242825607064,
3105
+ "learning_rate": 8.425783972125435e-07,
3106
+ "loss": 1.3401,
3107
+ "step": 10240
3108
+ },
3109
+ {
3110
+ "epoch": 113.24503311258277,
3111
+ "learning_rate": 8.422648083623693e-07,
3112
+ "loss": 1.3599,
3113
+ "step": 10260
3114
+ },
3115
+ {
3116
+ "epoch": 113.46578366445917,
3117
+ "learning_rate": 8.419512195121951e-07,
3118
+ "loss": 1.3688,
3119
+ "step": 10280
3120
+ },
3121
+ {
3122
+ "epoch": 113.68653421633555,
3123
+ "learning_rate": 8.416376306620209e-07,
3124
+ "loss": 1.3976,
3125
+ "step": 10300
3126
+ },
3127
+ {
3128
+ "epoch": 113.90728476821192,
3129
+ "learning_rate": 8.413240418118465e-07,
3130
+ "loss": 1.3909,
3131
+ "step": 10320
3132
+ },
3133
+ {
3134
+ "epoch": 114.1280353200883,
3135
+ "learning_rate": 8.410104529616724e-07,
3136
+ "loss": 1.4132,
3137
+ "step": 10340
3138
+ },
3139
+ {
3140
+ "epoch": 114.34878587196468,
3141
+ "learning_rate": 8.406968641114982e-07,
3142
+ "loss": 1.3741,
3143
+ "step": 10360
3144
+ },
3145
+ {
3146
+ "epoch": 114.56953642384106,
3147
+ "learning_rate": 8.40383275261324e-07,
3148
+ "loss": 1.3339,
3149
+ "step": 10380
3150
+ },
3151
+ {
3152
+ "epoch": 114.79028697571744,
3153
+ "learning_rate": 8.400696864111498e-07,
3154
+ "loss": 1.3869,
3155
+ "step": 10400
3156
+ },
3157
+ {
3158
+ "epoch": 115.01103752759381,
3159
+ "learning_rate": 8.397560975609756e-07,
3160
+ "loss": 1.3644,
3161
+ "step": 10420
3162
+ },
3163
+ {
3164
+ "epoch": 115.23178807947019,
3165
+ "learning_rate": 8.394425087108014e-07,
3166
+ "loss": 1.3584,
3167
+ "step": 10440
3168
+ },
3169
+ {
3170
+ "epoch": 115.45253863134658,
3171
+ "learning_rate": 8.391289198606271e-07,
3172
+ "loss": 1.3698,
3173
+ "step": 10460
3174
+ },
3175
+ {
3176
+ "epoch": 115.67328918322296,
3177
+ "learning_rate": 8.388153310104529e-07,
3178
+ "loss": 1.39,
3179
+ "step": 10480
3180
+ },
3181
+ {
3182
+ "epoch": 115.89403973509934,
3183
+ "learning_rate": 8.385017421602787e-07,
3184
+ "loss": 1.3651,
3185
+ "step": 10500
3186
+ },
3187
+ {
3188
+ "epoch": 116.11479028697572,
3189
+ "learning_rate": 8.381881533101045e-07,
3190
+ "loss": 1.339,
3191
+ "step": 10520
3192
+ },
3193
+ {
3194
+ "epoch": 116.3355408388521,
3195
+ "learning_rate": 8.378745644599303e-07,
3196
+ "loss": 1.3469,
3197
+ "step": 10540
3198
+ },
3199
+ {
3200
+ "epoch": 116.55629139072848,
3201
+ "learning_rate": 8.375609756097561e-07,
3202
+ "loss": 1.4008,
3203
+ "step": 10560
3204
+ },
3205
+ {
3206
+ "epoch": 116.77704194260485,
3207
+ "learning_rate": 8.372473867595818e-07,
3208
+ "loss": 1.3757,
3209
+ "step": 10580
3210
+ },
3211
+ {
3212
+ "epoch": 116.99779249448123,
3213
+ "learning_rate": 8.369337979094076e-07,
3214
+ "loss": 1.3792,
3215
+ "step": 10600
3216
+ },
3217
+ {
3218
+ "epoch": 117.21854304635761,
3219
+ "learning_rate": 8.366202090592334e-07,
3220
+ "loss": 1.3422,
3221
+ "step": 10620
3222
+ },
3223
+ {
3224
+ "epoch": 117.439293598234,
3225
+ "learning_rate": 8.363066202090592e-07,
3226
+ "loss": 1.3876,
3227
+ "step": 10640
3228
+ },
3229
+ {
3230
+ "epoch": 117.66004415011038,
3231
+ "learning_rate": 8.359930313588849e-07,
3232
+ "loss": 1.3519,
3233
+ "step": 10660
3234
+ },
3235
+ {
3236
+ "epoch": 117.88079470198676,
3237
+ "learning_rate": 8.356794425087108e-07,
3238
+ "loss": 1.3268,
3239
+ "step": 10680
3240
+ },
3241
+ {
3242
+ "epoch": 118.10154525386314,
3243
+ "learning_rate": 8.353658536585366e-07,
3244
+ "loss": 1.4245,
3245
+ "step": 10700
3246
+ },
3247
+ {
3248
+ "epoch": 118.32229580573951,
3249
+ "learning_rate": 8.350522648083623e-07,
3250
+ "loss": 1.3755,
3251
+ "step": 10720
3252
+ },
3253
+ {
3254
+ "epoch": 118.54304635761589,
3255
+ "learning_rate": 8.347386759581881e-07,
3256
+ "loss": 1.3318,
3257
+ "step": 10740
3258
+ },
3259
+ {
3260
+ "epoch": 118.76379690949227,
3261
+ "learning_rate": 8.344250871080139e-07,
3262
+ "loss": 1.3395,
3263
+ "step": 10760
3264
+ },
3265
+ {
3266
+ "epoch": 118.98454746136865,
3267
+ "learning_rate": 8.341114982578397e-07,
3268
+ "loss": 1.407,
3269
+ "step": 10780
3270
+ },
3271
+ {
3272
+ "epoch": 119.20529801324503,
3273
+ "learning_rate": 8.337979094076654e-07,
3274
+ "loss": 1.3621,
3275
+ "step": 10800
3276
+ },
3277
+ {
3278
+ "epoch": 119.20529801324503,
3279
+ "eval_bleu": 49.121,
3280
+ "eval_gen_len": 8.8,
3281
+ "eval_loss": 2.0037317276000977,
3282
+ "eval_runtime": 3.2416,
3283
+ "eval_samples_per_second": 9.255,
3284
+ "eval_steps_per_second": 1.851,
3285
+ "step": 10800
3286
  }
3287
  ],
3288
  "logging_steps": 20,
 
3302
  "attributes": {}
3303
  }
3304
  },
3305
+ "total_flos": 3.194864182891315e+16,
3306
  "train_batch_size": 5,
3307
  "trial_name": null,
3308
  "trial_params": null