dq158 commited on
Commit
45891c1
1 Parent(s): 8ffadf4

Training in progress, epoch 5, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16aab112374e0637635192e631493e5cc9fe41a7e4e6e216c0bb99e95ae685a5
3
  size 37789864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52b37145078d88e9a343bd22617f50ead8d019b0aed1436bbb79f1fd49c66b6e
3
  size 37789864
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddbb2f973e41d59a5b9f8697998020f6be69cf0c9c3ecccf1446dc42581a6fef
3
  size 2622266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bcc4bfa26449a669fcf4e2bd2006218fa37d9141b3c0e6ed1720dba59a9fd65
3
  size 2622266
last-checkpoint/rng_state.pth CHANGED
Binary files a/last-checkpoint/rng_state.pth and b/last-checkpoint/rng_state.pth differ
 
last-checkpoint/scheduler.pt CHANGED
Binary files a/last-checkpoint/scheduler.pt and b/last-checkpoint/scheduler.pt differ
 
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 3.0066018104553223,
3
- "best_model_checkpoint": "dq158/coqui/checkpoint-161392",
4
- "epoch": 4.0,
5
  "eval_steps": 500,
6
- "global_step": 161392,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2015,13 +2015,518 @@
2015
  "eval_steps_per_second": 0.544,
2016
  "eval_translation_length": 4591104,
2017
  "step": 161392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2018
  }
2019
  ],
2020
  "logging_steps": 500,
2021
  "max_steps": 1210440,
2022
  "num_train_epochs": 30,
2023
  "save_steps": 1000,
2024
- "total_flos": 2.769874243981148e+18,
2025
  "trial_name": null,
2026
  "trial_params": null
2027
  }
 
1
  {
2
+ "best_metric": 2.995251417160034,
3
+ "best_model_checkpoint": "dq158/coqui/checkpoint-201740",
4
+ "epoch": 5.0,
5
  "eval_steps": 500,
6
+ "global_step": 201740,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2015
  "eval_steps_per_second": 0.544,
2016
  "eval_translation_length": 4591104,
2017
  "step": 161392
2018
+ },
2019
+ {
2020
+ "epoch": 4.0,
2021
+ "learning_rate": 7.655554846753297e-05,
2022
+ "loss": 3.1442,
2023
+ "step": 161500
2024
+ },
2025
+ {
2026
+ "epoch": 4.02,
2027
+ "learning_rate": 7.653443602624745e-05,
2028
+ "loss": 3.079,
2029
+ "step": 162000
2030
+ },
2031
+ {
2032
+ "epoch": 4.03,
2033
+ "learning_rate": 7.651326200854252e-05,
2034
+ "loss": 3.1155,
2035
+ "step": 162500
2036
+ },
2037
+ {
2038
+ "epoch": 4.04,
2039
+ "learning_rate": 7.64920264501056e-05,
2040
+ "loss": 3.034,
2041
+ "step": 163000
2042
+ },
2043
+ {
2044
+ "epoch": 4.05,
2045
+ "learning_rate": 7.647072938672785e-05,
2046
+ "loss": 3.1226,
2047
+ "step": 163500
2048
+ },
2049
+ {
2050
+ "epoch": 4.06,
2051
+ "learning_rate": 7.644937085430409e-05,
2052
+ "loss": 3.1027,
2053
+ "step": 164000
2054
+ },
2055
+ {
2056
+ "epoch": 4.08,
2057
+ "learning_rate": 7.642795088883274e-05,
2058
+ "loss": 3.1736,
2059
+ "step": 164500
2060
+ },
2061
+ {
2062
+ "epoch": 4.09,
2063
+ "learning_rate": 7.640646952641577e-05,
2064
+ "loss": 2.9279,
2065
+ "step": 165000
2066
+ },
2067
+ {
2068
+ "epoch": 4.1,
2069
+ "learning_rate": 7.638492680325862e-05,
2070
+ "loss": 3.0596,
2071
+ "step": 165500
2072
+ },
2073
+ {
2074
+ "epoch": 4.11,
2075
+ "learning_rate": 7.636332275567012e-05,
2076
+ "loss": 3.0027,
2077
+ "step": 166000
2078
+ },
2079
+ {
2080
+ "epoch": 4.13,
2081
+ "learning_rate": 7.634165742006251e-05,
2082
+ "loss": 3.1487,
2083
+ "step": 166500
2084
+ },
2085
+ {
2086
+ "epoch": 4.14,
2087
+ "learning_rate": 7.631993083295134e-05,
2088
+ "loss": 3.04,
2089
+ "step": 167000
2090
+ },
2091
+ {
2092
+ "epoch": 4.15,
2093
+ "learning_rate": 7.62981430309553e-05,
2094
+ "loss": 3.0355,
2095
+ "step": 167500
2096
+ },
2097
+ {
2098
+ "epoch": 4.16,
2099
+ "learning_rate": 7.627629405079637e-05,
2100
+ "loss": 3.1222,
2101
+ "step": 168000
2102
+ },
2103
+ {
2104
+ "epoch": 4.18,
2105
+ "learning_rate": 7.625438392929956e-05,
2106
+ "loss": 3.0185,
2107
+ "step": 168500
2108
+ },
2109
+ {
2110
+ "epoch": 4.19,
2111
+ "learning_rate": 7.623241270339294e-05,
2112
+ "loss": 3.036,
2113
+ "step": 169000
2114
+ },
2115
+ {
2116
+ "epoch": 4.2,
2117
+ "learning_rate": 7.621038041010763e-05,
2118
+ "loss": 3.0182,
2119
+ "step": 169500
2120
+ },
2121
+ {
2122
+ "epoch": 4.21,
2123
+ "learning_rate": 7.61882870865776e-05,
2124
+ "loss": 3.0501,
2125
+ "step": 170000
2126
+ },
2127
+ {
2128
+ "epoch": 4.23,
2129
+ "learning_rate": 7.61661327700397e-05,
2130
+ "loss": 3.0935,
2131
+ "step": 170500
2132
+ },
2133
+ {
2134
+ "epoch": 4.24,
2135
+ "learning_rate": 7.614391749783361e-05,
2136
+ "loss": 2.9745,
2137
+ "step": 171000
2138
+ },
2139
+ {
2140
+ "epoch": 4.25,
2141
+ "learning_rate": 7.612164130740175e-05,
2142
+ "loss": 3.0497,
2143
+ "step": 171500
2144
+ },
2145
+ {
2146
+ "epoch": 4.26,
2147
+ "learning_rate": 7.609930423628915e-05,
2148
+ "loss": 3.0207,
2149
+ "step": 172000
2150
+ },
2151
+ {
2152
+ "epoch": 4.28,
2153
+ "learning_rate": 7.607690632214351e-05,
2154
+ "loss": 3.0059,
2155
+ "step": 172500
2156
+ },
2157
+ {
2158
+ "epoch": 4.29,
2159
+ "learning_rate": 7.605444760271507e-05,
2160
+ "loss": 3.113,
2161
+ "step": 173000
2162
+ },
2163
+ {
2164
+ "epoch": 4.3,
2165
+ "learning_rate": 7.603192811585654e-05,
2166
+ "loss": 3.0989,
2167
+ "step": 173500
2168
+ },
2169
+ {
2170
+ "epoch": 4.31,
2171
+ "learning_rate": 7.600934789952304e-05,
2172
+ "loss": 3.1174,
2173
+ "step": 174000
2174
+ },
2175
+ {
2176
+ "epoch": 4.32,
2177
+ "learning_rate": 7.598670699177207e-05,
2178
+ "loss": 3.0884,
2179
+ "step": 174500
2180
+ },
2181
+ {
2182
+ "epoch": 4.34,
2183
+ "learning_rate": 7.596400543076339e-05,
2184
+ "loss": 2.973,
2185
+ "step": 175000
2186
+ },
2187
+ {
2188
+ "epoch": 4.35,
2189
+ "learning_rate": 7.594124325475904e-05,
2190
+ "loss": 3.0956,
2191
+ "step": 175500
2192
+ },
2193
+ {
2194
+ "epoch": 4.36,
2195
+ "learning_rate": 7.591842050212317e-05,
2196
+ "loss": 2.9274,
2197
+ "step": 176000
2198
+ },
2199
+ {
2200
+ "epoch": 4.37,
2201
+ "learning_rate": 7.589553721132205e-05,
2202
+ "loss": 3.0128,
2203
+ "step": 176500
2204
+ },
2205
+ {
2206
+ "epoch": 4.39,
2207
+ "learning_rate": 7.587259342092397e-05,
2208
+ "loss": 3.0429,
2209
+ "step": 177000
2210
+ },
2211
+ {
2212
+ "epoch": 4.4,
2213
+ "learning_rate": 7.584958916959923e-05,
2214
+ "loss": 3.0955,
2215
+ "step": 177500
2216
+ },
2217
+ {
2218
+ "epoch": 4.41,
2219
+ "learning_rate": 7.582652449611996e-05,
2220
+ "loss": 3.1124,
2221
+ "step": 178000
2222
+ },
2223
+ {
2224
+ "epoch": 4.42,
2225
+ "learning_rate": 7.58033994393602e-05,
2226
+ "loss": 2.9723,
2227
+ "step": 178500
2228
+ },
2229
+ {
2230
+ "epoch": 4.44,
2231
+ "learning_rate": 7.578021403829572e-05,
2232
+ "loss": 2.981,
2233
+ "step": 179000
2234
+ },
2235
+ {
2236
+ "epoch": 4.45,
2237
+ "learning_rate": 7.5756968332004e-05,
2238
+ "loss": 3.1174,
2239
+ "step": 179500
2240
+ },
2241
+ {
2242
+ "epoch": 4.46,
2243
+ "learning_rate": 7.57336623596642e-05,
2244
+ "loss": 3.0292,
2245
+ "step": 180000
2246
+ },
2247
+ {
2248
+ "epoch": 4.47,
2249
+ "learning_rate": 7.5710296160557e-05,
2250
+ "loss": 3.0106,
2251
+ "step": 180500
2252
+ },
2253
+ {
2254
+ "epoch": 4.49,
2255
+ "learning_rate": 7.568686977406459e-05,
2256
+ "loss": 2.9749,
2257
+ "step": 181000
2258
+ },
2259
+ {
2260
+ "epoch": 4.5,
2261
+ "learning_rate": 7.566338323967065e-05,
2262
+ "loss": 3.0591,
2263
+ "step": 181500
2264
+ },
2265
+ {
2266
+ "epoch": 4.51,
2267
+ "learning_rate": 7.563983659696022e-05,
2268
+ "loss": 3.0228,
2269
+ "step": 182000
2270
+ },
2271
+ {
2272
+ "epoch": 4.52,
2273
+ "learning_rate": 7.56162298856196e-05,
2274
+ "loss": 3.0134,
2275
+ "step": 182500
2276
+ },
2277
+ {
2278
+ "epoch": 4.54,
2279
+ "learning_rate": 7.559256314543639e-05,
2280
+ "loss": 3.0624,
2281
+ "step": 183000
2282
+ },
2283
+ {
2284
+ "epoch": 4.55,
2285
+ "learning_rate": 7.556883641629936e-05,
2286
+ "loss": 3.0205,
2287
+ "step": 183500
2288
+ },
2289
+ {
2290
+ "epoch": 4.56,
2291
+ "learning_rate": 7.554504973819835e-05,
2292
+ "loss": 3.0067,
2293
+ "step": 184000
2294
+ },
2295
+ {
2296
+ "epoch": 4.57,
2297
+ "learning_rate": 7.552120315122426e-05,
2298
+ "loss": 3.0488,
2299
+ "step": 184500
2300
+ },
2301
+ {
2302
+ "epoch": 4.59,
2303
+ "learning_rate": 7.549729669556898e-05,
2304
+ "loss": 3.0992,
2305
+ "step": 185000
2306
+ },
2307
+ {
2308
+ "epoch": 4.6,
2309
+ "learning_rate": 7.547333041152526e-05,
2310
+ "loss": 3.0137,
2311
+ "step": 185500
2312
+ },
2313
+ {
2314
+ "epoch": 4.61,
2315
+ "learning_rate": 7.544930433948676e-05,
2316
+ "loss": 3.1379,
2317
+ "step": 186000
2318
+ },
2319
+ {
2320
+ "epoch": 4.62,
2321
+ "learning_rate": 7.542521851994781e-05,
2322
+ "loss": 3.0818,
2323
+ "step": 186500
2324
+ },
2325
+ {
2326
+ "epoch": 4.63,
2327
+ "learning_rate": 7.540107299350354e-05,
2328
+ "loss": 3.0634,
2329
+ "step": 187000
2330
+ },
2331
+ {
2332
+ "epoch": 4.65,
2333
+ "learning_rate": 7.537686780084966e-05,
2334
+ "loss": 3.0984,
2335
+ "step": 187500
2336
+ },
2337
+ {
2338
+ "epoch": 4.66,
2339
+ "learning_rate": 7.53526029827824e-05,
2340
+ "loss": 3.1238,
2341
+ "step": 188000
2342
+ },
2343
+ {
2344
+ "epoch": 4.67,
2345
+ "learning_rate": 7.532827858019862e-05,
2346
+ "loss": 3.0431,
2347
+ "step": 188500
2348
+ },
2349
+ {
2350
+ "epoch": 4.68,
2351
+ "learning_rate": 7.530389463409545e-05,
2352
+ "loss": 3.0216,
2353
+ "step": 189000
2354
+ },
2355
+ {
2356
+ "epoch": 4.7,
2357
+ "learning_rate": 7.527945118557048e-05,
2358
+ "loss": 3.0448,
2359
+ "step": 189500
2360
+ },
2361
+ {
2362
+ "epoch": 4.71,
2363
+ "learning_rate": 7.525494827582155e-05,
2364
+ "loss": 3.1713,
2365
+ "step": 190000
2366
+ },
2367
+ {
2368
+ "epoch": 4.72,
2369
+ "learning_rate": 7.523038594614671e-05,
2370
+ "loss": 3.0396,
2371
+ "step": 190500
2372
+ },
2373
+ {
2374
+ "epoch": 4.73,
2375
+ "learning_rate": 7.52057642379442e-05,
2376
+ "loss": 3.1008,
2377
+ "step": 191000
2378
+ },
2379
+ {
2380
+ "epoch": 4.75,
2381
+ "learning_rate": 7.518108319271228e-05,
2382
+ "loss": 3.0965,
2383
+ "step": 191500
2384
+ },
2385
+ {
2386
+ "epoch": 4.76,
2387
+ "learning_rate": 7.515634285204928e-05,
2388
+ "loss": 3.0407,
2389
+ "step": 192000
2390
+ },
2391
+ {
2392
+ "epoch": 4.77,
2393
+ "learning_rate": 7.51315432576534e-05,
2394
+ "loss": 3.0669,
2395
+ "step": 192500
2396
+ },
2397
+ {
2398
+ "epoch": 4.78,
2399
+ "learning_rate": 7.510668445132279e-05,
2400
+ "loss": 3.0752,
2401
+ "step": 193000
2402
+ },
2403
+ {
2404
+ "epoch": 4.8,
2405
+ "learning_rate": 7.508176647495532e-05,
2406
+ "loss": 2.9414,
2407
+ "step": 193500
2408
+ },
2409
+ {
2410
+ "epoch": 4.81,
2411
+ "learning_rate": 7.505678937054863e-05,
2412
+ "loss": 3.0746,
2413
+ "step": 194000
2414
+ },
2415
+ {
2416
+ "epoch": 4.82,
2417
+ "learning_rate": 7.503175318019999e-05,
2418
+ "loss": 2.9645,
2419
+ "step": 194500
2420
+ },
2421
+ {
2422
+ "epoch": 4.83,
2423
+ "learning_rate": 7.500665794610632e-05,
2424
+ "loss": 3.0942,
2425
+ "step": 195000
2426
+ },
2427
+ {
2428
+ "epoch": 4.85,
2429
+ "learning_rate": 7.498150371056396e-05,
2430
+ "loss": 3.0586,
2431
+ "step": 195500
2432
+ },
2433
+ {
2434
+ "epoch": 4.86,
2435
+ "learning_rate": 7.495629051596876e-05,
2436
+ "loss": 3.0251,
2437
+ "step": 196000
2438
+ },
2439
+ {
2440
+ "epoch": 4.87,
2441
+ "learning_rate": 7.493101840481594e-05,
2442
+ "loss": 3.0769,
2443
+ "step": 196500
2444
+ },
2445
+ {
2446
+ "epoch": 4.88,
2447
+ "learning_rate": 7.490568741969997e-05,
2448
+ "loss": 3.0701,
2449
+ "step": 197000
2450
+ },
2451
+ {
2452
+ "epoch": 4.89,
2453
+ "learning_rate": 7.488029760331459e-05,
2454
+ "loss": 3.0893,
2455
+ "step": 197500
2456
+ },
2457
+ {
2458
+ "epoch": 4.91,
2459
+ "learning_rate": 7.48548489984527e-05,
2460
+ "loss": 3.1527,
2461
+ "step": 198000
2462
+ },
2463
+ {
2464
+ "epoch": 4.92,
2465
+ "learning_rate": 7.482934164800626e-05,
2466
+ "loss": 3.0497,
2467
+ "step": 198500
2468
+ },
2469
+ {
2470
+ "epoch": 4.93,
2471
+ "learning_rate": 7.480377559496624e-05,
2472
+ "loss": 3.0665,
2473
+ "step": 199000
2474
+ },
2475
+ {
2476
+ "epoch": 4.94,
2477
+ "learning_rate": 7.47781508824226e-05,
2478
+ "loss": 3.0842,
2479
+ "step": 199500
2480
+ },
2481
+ {
2482
+ "epoch": 4.96,
2483
+ "learning_rate": 7.47524675535641e-05,
2484
+ "loss": 3.0961,
2485
+ "step": 200000
2486
+ },
2487
+ {
2488
+ "epoch": 4.97,
2489
+ "learning_rate": 7.472672565167833e-05,
2490
+ "loss": 3.0756,
2491
+ "step": 200500
2492
+ },
2493
+ {
2494
+ "epoch": 4.98,
2495
+ "learning_rate": 7.470092522015158e-05,
2496
+ "loss": 3.1524,
2497
+ "step": 201000
2498
+ },
2499
+ {
2500
+ "epoch": 4.99,
2501
+ "learning_rate": 7.467506630246882e-05,
2502
+ "loss": 3.1614,
2503
+ "step": 201500
2504
+ },
2505
+ {
2506
+ "epoch": 5.0,
2507
+ "eval_bleu": 1.0,
2508
+ "eval_brevity_penalty": 1.0,
2509
+ "eval_length_ratio": 1.0,
2510
+ "eval_loss": 2.995251417160034,
2511
+ "eval_precisions": [
2512
+ 1.0,
2513
+ 1.0,
2514
+ 1.0,
2515
+ 1.0
2516
+ ],
2517
+ "eval_reference_length": 4591104,
2518
+ "eval_runtime": 8351.4463,
2519
+ "eval_samples_per_second": 1.074,
2520
+ "eval_steps_per_second": 0.537,
2521
+ "eval_translation_length": 4591104,
2522
+ "step": 201740
2523
  }
2524
  ],
2525
  "logging_steps": 500,
2526
  "max_steps": 1210440,
2527
  "num_train_epochs": 30,
2528
  "save_steps": 1000,
2529
+ "total_flos": 3.462342804976435e+18,
2530
  "trial_name": null,
2531
  "trial_params": null
2532
  }