dq158 commited on
Commit
e6488f2
1 Parent(s): afd867c

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:caa75ba462fc485764d8d1e85a765bd3b713eeb0b6b041e8f5bed4bb5d25ec75
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c62c435fa96b78ad6ecac93429cba4d88f3f2af5297b739e4a5280675f4784cf
3
  size 1256
last-checkpoint/rng_state.pth CHANGED
Binary files a/last-checkpoint/rng_state.pth and b/last-checkpoint/rng_state.pth differ
 
last-checkpoint/scheduler.pt CHANGED
Binary files a/last-checkpoint/scheduler.pt and b/last-checkpoint/scheduler.pt differ
 
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 2.873922109603882,
3
  "best_model_checkpoint": "dq158/coqui/checkpoint-40162",
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 80324,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1005,13 +1005,512 @@
1005
  "eval_steps_per_second": 0.568,
1006
  "eval_translation_length": 4569600,
1007
  "step": 80324
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1008
  }
1009
  ],
1010
  "logging_steps": 500,
1011
  "max_steps": 1204860,
1012
  "num_train_epochs": 30,
1013
  "save_steps": 1000,
1014
- "total_flos": 1.3785697922643395e+18,
1015
  "trial_name": null,
1016
  "trial_params": null
1017
  }
 
1
  {
2
  "best_metric": 2.873922109603882,
3
  "best_model_checkpoint": "dq158/coqui/checkpoint-40162",
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 120486,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1005
  "eval_steps_per_second": 0.568,
1006
  "eval_translation_length": 4569600,
1007
  "step": 80324
1008
+ },
1009
+ {
1010
+ "epoch": 2.0,
1011
+ "learning_rate": 7.913219840074152e-05,
1012
+ "loss": 3.0052,
1013
+ "step": 80500
1014
+ },
1015
+ {
1016
+ "epoch": 2.02,
1017
+ "learning_rate": 7.912135699428588e-05,
1018
+ "loss": 2.9191,
1019
+ "step": 81000
1020
+ },
1021
+ {
1022
+ "epoch": 2.03,
1023
+ "learning_rate": 7.91104490389186e-05,
1024
+ "loss": 3.0454,
1025
+ "step": 81500
1026
+ },
1027
+ {
1028
+ "epoch": 2.04,
1029
+ "learning_rate": 7.909947455319513e-05,
1030
+ "loss": 3.0362,
1031
+ "step": 82000
1032
+ },
1033
+ {
1034
+ "epoch": 2.05,
1035
+ "learning_rate": 7.908843355578402e-05,
1036
+ "loss": 3.016,
1037
+ "step": 82500
1038
+ },
1039
+ {
1040
+ "epoch": 2.07,
1041
+ "learning_rate": 7.907732606546699e-05,
1042
+ "loss": 3.0735,
1043
+ "step": 83000
1044
+ },
1045
+ {
1046
+ "epoch": 2.08,
1047
+ "learning_rate": 7.906615210113886e-05,
1048
+ "loss": 3.0626,
1049
+ "step": 83500
1050
+ },
1051
+ {
1052
+ "epoch": 2.09,
1053
+ "learning_rate": 7.905491168180757e-05,
1054
+ "loss": 2.9955,
1055
+ "step": 84000
1056
+ },
1057
+ {
1058
+ "epoch": 2.1,
1059
+ "learning_rate": 7.904360482659405e-05,
1060
+ "loss": 3.0104,
1061
+ "step": 84500
1062
+ },
1063
+ {
1064
+ "epoch": 2.12,
1065
+ "learning_rate": 7.903223155473227e-05,
1066
+ "loss": 3.0618,
1067
+ "step": 85000
1068
+ },
1069
+ {
1070
+ "epoch": 2.13,
1071
+ "learning_rate": 7.902079188556919e-05,
1072
+ "loss": 3.0146,
1073
+ "step": 85500
1074
+ },
1075
+ {
1076
+ "epoch": 2.14,
1077
+ "learning_rate": 7.900928583856469e-05,
1078
+ "loss": 3.0075,
1079
+ "step": 86000
1080
+ },
1081
+ {
1082
+ "epoch": 2.15,
1083
+ "learning_rate": 7.899771343329159e-05,
1084
+ "loss": 2.9807,
1085
+ "step": 86500
1086
+ },
1087
+ {
1088
+ "epoch": 2.17,
1089
+ "learning_rate": 7.898607468943558e-05,
1090
+ "loss": 3.0609,
1091
+ "step": 87000
1092
+ },
1093
+ {
1094
+ "epoch": 2.18,
1095
+ "learning_rate": 7.89743696267952e-05,
1096
+ "loss": 2.9379,
1097
+ "step": 87500
1098
+ },
1099
+ {
1100
+ "epoch": 2.19,
1101
+ "learning_rate": 7.896259826528179e-05,
1102
+ "loss": 3.0411,
1103
+ "step": 88000
1104
+ },
1105
+ {
1106
+ "epoch": 2.2,
1107
+ "learning_rate": 7.89507606249195e-05,
1108
+ "loss": 3.0788,
1109
+ "step": 88500
1110
+ },
1111
+ {
1112
+ "epoch": 2.22,
1113
+ "learning_rate": 7.893885672584521e-05,
1114
+ "loss": 3.115,
1115
+ "step": 89000
1116
+ },
1117
+ {
1118
+ "epoch": 2.23,
1119
+ "learning_rate": 7.892688658830852e-05,
1120
+ "loss": 3.0927,
1121
+ "step": 89500
1122
+ },
1123
+ {
1124
+ "epoch": 2.24,
1125
+ "learning_rate": 7.891485023267167e-05,
1126
+ "loss": 2.9474,
1127
+ "step": 90000
1128
+ },
1129
+ {
1130
+ "epoch": 2.25,
1131
+ "learning_rate": 7.890274767940961e-05,
1132
+ "loss": 3.0862,
1133
+ "step": 90500
1134
+ },
1135
+ {
1136
+ "epoch": 2.27,
1137
+ "learning_rate": 7.889057894910983e-05,
1138
+ "loss": 3.0357,
1139
+ "step": 91000
1140
+ },
1141
+ {
1142
+ "epoch": 2.28,
1143
+ "learning_rate": 7.887834406247244e-05,
1144
+ "loss": 3.0165,
1145
+ "step": 91500
1146
+ },
1147
+ {
1148
+ "epoch": 2.29,
1149
+ "learning_rate": 7.886604304031007e-05,
1150
+ "loss": 3.0627,
1151
+ "step": 92000
1152
+ },
1153
+ {
1154
+ "epoch": 2.3,
1155
+ "learning_rate": 7.885367590354785e-05,
1156
+ "loss": 3.0763,
1157
+ "step": 92500
1158
+ },
1159
+ {
1160
+ "epoch": 2.32,
1161
+ "learning_rate": 7.88412426732234e-05,
1162
+ "loss": 3.0735,
1163
+ "step": 93000
1164
+ },
1165
+ {
1166
+ "epoch": 2.33,
1167
+ "learning_rate": 7.882874337048669e-05,
1168
+ "loss": 2.9523,
1169
+ "step": 93500
1170
+ },
1171
+ {
1172
+ "epoch": 2.34,
1173
+ "learning_rate": 7.881617801660023e-05,
1174
+ "loss": 2.9885,
1175
+ "step": 94000
1176
+ },
1177
+ {
1178
+ "epoch": 2.35,
1179
+ "learning_rate": 7.880354663293875e-05,
1180
+ "loss": 2.9825,
1181
+ "step": 94500
1182
+ },
1183
+ {
1184
+ "epoch": 2.37,
1185
+ "learning_rate": 7.879084924098938e-05,
1186
+ "loss": 2.9537,
1187
+ "step": 95000
1188
+ },
1189
+ {
1190
+ "epoch": 2.38,
1191
+ "learning_rate": 7.877808586235151e-05,
1192
+ "loss": 3.0941,
1193
+ "step": 95500
1194
+ },
1195
+ {
1196
+ "epoch": 2.39,
1197
+ "learning_rate": 7.876525651873679e-05,
1198
+ "loss": 3.0817,
1199
+ "step": 96000
1200
+ },
1201
+ {
1202
+ "epoch": 2.4,
1203
+ "learning_rate": 7.875236123196905e-05,
1204
+ "loss": 3.0927,
1205
+ "step": 96500
1206
+ },
1207
+ {
1208
+ "epoch": 2.42,
1209
+ "learning_rate": 7.873940002398435e-05,
1210
+ "loss": 2.9777,
1211
+ "step": 97000
1212
+ },
1213
+ {
1214
+ "epoch": 2.43,
1215
+ "learning_rate": 7.872637291683084e-05,
1216
+ "loss": 3.0594,
1217
+ "step": 97500
1218
+ },
1219
+ {
1220
+ "epoch": 2.44,
1221
+ "learning_rate": 7.871327993266882e-05,
1222
+ "loss": 3.0813,
1223
+ "step": 98000
1224
+ },
1225
+ {
1226
+ "epoch": 2.45,
1227
+ "learning_rate": 7.870012109377058e-05,
1228
+ "loss": 3.0248,
1229
+ "step": 98500
1230
+ },
1231
+ {
1232
+ "epoch": 2.47,
1233
+ "learning_rate": 7.868689642252051e-05,
1234
+ "loss": 2.9885,
1235
+ "step": 99000
1236
+ },
1237
+ {
1238
+ "epoch": 2.48,
1239
+ "learning_rate": 7.867360594141493e-05,
1240
+ "loss": 3.037,
1241
+ "step": 99500
1242
+ },
1243
+ {
1244
+ "epoch": 2.49,
1245
+ "learning_rate": 7.866024967306214e-05,
1246
+ "loss": 3.1189,
1247
+ "step": 100000
1248
+ },
1249
+ {
1250
+ "epoch": 2.5,
1251
+ "learning_rate": 7.864682764018236e-05,
1252
+ "loss": 2.9825,
1253
+ "step": 100500
1254
+ },
1255
+ {
1256
+ "epoch": 2.51,
1257
+ "learning_rate": 7.863333986560763e-05,
1258
+ "loss": 3.0277,
1259
+ "step": 101000
1260
+ },
1261
+ {
1262
+ "epoch": 2.53,
1263
+ "learning_rate": 7.861978637228187e-05,
1264
+ "loss": 3.1551,
1265
+ "step": 101500
1266
+ },
1267
+ {
1268
+ "epoch": 2.54,
1269
+ "learning_rate": 7.860616718326079e-05,
1270
+ "loss": 3.0137,
1271
+ "step": 102000
1272
+ },
1273
+ {
1274
+ "epoch": 2.55,
1275
+ "learning_rate": 7.859248232171184e-05,
1276
+ "loss": 3.0373,
1277
+ "step": 102500
1278
+ },
1279
+ {
1280
+ "epoch": 2.56,
1281
+ "learning_rate": 7.857873181091416e-05,
1282
+ "loss": 3.0931,
1283
+ "step": 103000
1284
+ },
1285
+ {
1286
+ "epoch": 2.58,
1287
+ "learning_rate": 7.856491567425863e-05,
1288
+ "loss": 3.0635,
1289
+ "step": 103500
1290
+ },
1291
+ {
1292
+ "epoch": 2.59,
1293
+ "learning_rate": 7.85510339352477e-05,
1294
+ "loss": 3.0677,
1295
+ "step": 104000
1296
+ },
1297
+ {
1298
+ "epoch": 2.6,
1299
+ "learning_rate": 7.853708661749548e-05,
1300
+ "loss": 3.0483,
1301
+ "step": 104500
1302
+ },
1303
+ {
1304
+ "epoch": 2.61,
1305
+ "learning_rate": 7.852307374472755e-05,
1306
+ "loss": 2.9974,
1307
+ "step": 105000
1308
+ },
1309
+ {
1310
+ "epoch": 2.63,
1311
+ "learning_rate": 7.850899534078111e-05,
1312
+ "loss": 3.0567,
1313
+ "step": 105500
1314
+ },
1315
+ {
1316
+ "epoch": 2.64,
1317
+ "learning_rate": 7.849485142960473e-05,
1318
+ "loss": 3.0159,
1319
+ "step": 106000
1320
+ },
1321
+ {
1322
+ "epoch": 2.65,
1323
+ "learning_rate": 7.848064203525849e-05,
1324
+ "loss": 3.0221,
1325
+ "step": 106500
1326
+ },
1327
+ {
1328
+ "epoch": 2.66,
1329
+ "learning_rate": 7.846636718191382e-05,
1330
+ "loss": 2.9791,
1331
+ "step": 107000
1332
+ },
1333
+ {
1334
+ "epoch": 2.68,
1335
+ "learning_rate": 7.845202689385353e-05,
1336
+ "loss": 2.9952,
1337
+ "step": 107500
1338
+ },
1339
+ {
1340
+ "epoch": 2.69,
1341
+ "learning_rate": 7.843762119547172e-05,
1342
+ "loss": 2.997,
1343
+ "step": 108000
1344
+ },
1345
+ {
1346
+ "epoch": 2.7,
1347
+ "learning_rate": 7.842315011127377e-05,
1348
+ "loss": 2.9545,
1349
+ "step": 108500
1350
+ },
1351
+ {
1352
+ "epoch": 2.71,
1353
+ "learning_rate": 7.840861366587628e-05,
1354
+ "loss": 3.0655,
1355
+ "step": 109000
1356
+ },
1357
+ {
1358
+ "epoch": 2.73,
1359
+ "learning_rate": 7.839401188400704e-05,
1360
+ "loss": 3.0671,
1361
+ "step": 109500
1362
+ },
1363
+ {
1364
+ "epoch": 2.74,
1365
+ "learning_rate": 7.837934479050496e-05,
1366
+ "loss": 3.0327,
1367
+ "step": 110000
1368
+ },
1369
+ {
1370
+ "epoch": 2.75,
1371
+ "learning_rate": 7.836461241032011e-05,
1372
+ "loss": 3.0038,
1373
+ "step": 110500
1374
+ },
1375
+ {
1376
+ "epoch": 2.76,
1377
+ "learning_rate": 7.834981476851354e-05,
1378
+ "loss": 3.0443,
1379
+ "step": 111000
1380
+ },
1381
+ {
1382
+ "epoch": 2.78,
1383
+ "learning_rate": 7.83349518902574e-05,
1384
+ "loss": 2.9654,
1385
+ "step": 111500
1386
+ },
1387
+ {
1388
+ "epoch": 2.79,
1389
+ "learning_rate": 7.832002380083474e-05,
1390
+ "loss": 3.0335,
1391
+ "step": 112000
1392
+ },
1393
+ {
1394
+ "epoch": 2.8,
1395
+ "learning_rate": 7.830503052563956e-05,
1396
+ "loss": 3.0491,
1397
+ "step": 112500
1398
+ },
1399
+ {
1400
+ "epoch": 2.81,
1401
+ "learning_rate": 7.828997209017678e-05,
1402
+ "loss": 3.0643,
1403
+ "step": 113000
1404
+ },
1405
+ {
1406
+ "epoch": 2.83,
1407
+ "learning_rate": 7.827484852006215e-05,
1408
+ "loss": 3.1021,
1409
+ "step": 113500
1410
+ },
1411
+ {
1412
+ "epoch": 2.84,
1413
+ "learning_rate": 7.825965984102216e-05,
1414
+ "loss": 3.0631,
1415
+ "step": 114000
1416
+ },
1417
+ {
1418
+ "epoch": 2.85,
1419
+ "learning_rate": 7.824440607889415e-05,
1420
+ "loss": 2.9952,
1421
+ "step": 114500
1422
+ },
1423
+ {
1424
+ "epoch": 2.86,
1425
+ "learning_rate": 7.822908725962612e-05,
1426
+ "loss": 3.0093,
1427
+ "step": 115000
1428
+ },
1429
+ {
1430
+ "epoch": 2.88,
1431
+ "learning_rate": 7.821370340927674e-05,
1432
+ "loss": 2.9657,
1433
+ "step": 115500
1434
+ },
1435
+ {
1436
+ "epoch": 2.89,
1437
+ "learning_rate": 7.819825455401529e-05,
1438
+ "loss": 3.0453,
1439
+ "step": 116000
1440
+ },
1441
+ {
1442
+ "epoch": 2.9,
1443
+ "learning_rate": 7.818274072012168e-05,
1444
+ "loss": 3.0879,
1445
+ "step": 116500
1446
+ },
1447
+ {
1448
+ "epoch": 2.91,
1449
+ "learning_rate": 7.81671619339863e-05,
1450
+ "loss": 2.9641,
1451
+ "step": 117000
1452
+ },
1453
+ {
1454
+ "epoch": 2.93,
1455
+ "learning_rate": 7.815151822211007e-05,
1456
+ "loss": 3.0363,
1457
+ "step": 117500
1458
+ },
1459
+ {
1460
+ "epoch": 2.94,
1461
+ "learning_rate": 7.813580961110435e-05,
1462
+ "loss": 3.051,
1463
+ "step": 118000
1464
+ },
1465
+ {
1466
+ "epoch": 2.95,
1467
+ "learning_rate": 7.812003612769084e-05,
1468
+ "loss": 3.0135,
1469
+ "step": 118500
1470
+ },
1471
+ {
1472
+ "epoch": 2.96,
1473
+ "learning_rate": 7.810419779870166e-05,
1474
+ "loss": 2.9968,
1475
+ "step": 119000
1476
+ },
1477
+ {
1478
+ "epoch": 2.98,
1479
+ "learning_rate": 7.808829465107923e-05,
1480
+ "loss": 3.0569,
1481
+ "step": 119500
1482
+ },
1483
+ {
1484
+ "epoch": 2.99,
1485
+ "learning_rate": 7.80723267118762e-05,
1486
+ "loss": 3.0546,
1487
+ "step": 120000
1488
+ },
1489
+ {
1490
+ "epoch": 3.0,
1491
+ "eval_bleu": 1.0,
1492
+ "eval_brevity_penalty": 1.0,
1493
+ "eval_length_ratio": 1.0,
1494
+ "eval_loss": 2.873922109603882,
1495
+ "eval_precisions": [
1496
+ 1.0,
1497
+ 1.0,
1498
+ 1.0,
1499
+ 1.0
1500
+ ],
1501
+ "eval_reference_length": 4569600,
1502
+ "eval_runtime": 7544.7431,
1503
+ "eval_samples_per_second": 1.183,
1504
+ "eval_steps_per_second": 0.592,
1505
+ "eval_translation_length": 4569600,
1506
+ "step": 120486
1507
  }
1508
  ],
1509
  "logging_steps": 500,
1510
  "max_steps": 1204860,
1511
  "num_train_epochs": 30,
1512
  "save_steps": 1000,
1513
+ "total_flos": 2.0678546883965092e+18,
1514
  "trial_name": null,
1515
  "trial_params": null
1516
  }