dq158 commited on
Commit
a3371eb
1 Parent(s): 5727974

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9500c02fbca154680c04376e725940306434e1e771ac54c7d0e0f17696e50ef2
3
  size 37789864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c41d9243f7df55b50aa4775a15858d272a4ec8e8c563c0cd8ec6b9d0b3da9f8a
3
  size 37789864
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1605f73694d383535828224ca8220e415442872bdcd5581f7fca283f1a4438eb
3
  size 2622266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29c84bda34e9c4f410c99669a1f937ad7a00bee4dfd64bcf19e7795e6886813d
3
  size 2622266
last-checkpoint/rng_state.pth CHANGED
Binary files a/last-checkpoint/rng_state.pth and b/last-checkpoint/rng_state.pth differ
 
last-checkpoint/scheduler.pt CHANGED
Binary files a/last-checkpoint/scheduler.pt and b/last-checkpoint/scheduler.pt differ
 
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 3.0691702365875244,
3
- "best_model_checkpoint": "dq158/coqui/checkpoint-80696",
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 80696,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1011,13 +1011,518 @@
1011
  "eval_steps_per_second": 0.543,
1012
  "eval_translation_length": 4591104,
1013
  "step": 80696
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1014
  }
1015
  ],
1016
  "logging_steps": 500,
1017
  "max_steps": 1210440,
1018
  "num_train_epochs": 30,
1019
  "save_steps": 1000,
1020
- "total_flos": 1.384937121990574e+18,
1021
  "trial_name": null,
1022
  "trial_params": null
1023
  }
 
1
  {
2
+ "best_metric": 3.0360162258148193,
3
+ "best_model_checkpoint": "dq158/coqui/checkpoint-121044",
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 121044,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1011
  "eval_steps_per_second": 0.543,
1012
  "eval_translation_length": 4591104,
1013
  "step": 80696
1014
+ },
1015
+ {
1016
+ "epoch": 2.01,
1017
+ "learning_rate": 7.912941309744704e-05,
1018
+ "loss": 3.1234,
1019
+ "step": 81000
1020
+ },
1021
+ {
1022
+ "epoch": 2.02,
1023
+ "learning_rate": 7.91186047830294e-05,
1024
+ "loss": 3.1118,
1025
+ "step": 81500
1026
+ },
1027
+ {
1028
+ "epoch": 2.03,
1029
+ "learning_rate": 7.910773053674371e-05,
1030
+ "loss": 3.1879,
1031
+ "step": 82000
1032
+ },
1033
+ {
1034
+ "epoch": 2.04,
1035
+ "learning_rate": 7.909679037691783e-05,
1036
+ "loss": 3.2032,
1037
+ "step": 82500
1038
+ },
1039
+ {
1040
+ "epoch": 2.06,
1041
+ "learning_rate": 7.908578432199069e-05,
1042
+ "loss": 3.1009,
1043
+ "step": 83000
1044
+ },
1045
+ {
1046
+ "epoch": 2.07,
1047
+ "learning_rate": 7.907471239051224e-05,
1048
+ "loss": 3.0688,
1049
+ "step": 83500
1050
+ },
1051
+ {
1052
+ "epoch": 2.08,
1053
+ "learning_rate": 7.906357460114355e-05,
1054
+ "loss": 3.0835,
1055
+ "step": 84000
1056
+ },
1057
+ {
1058
+ "epoch": 2.09,
1059
+ "learning_rate": 7.90523709726566e-05,
1060
+ "loss": 3.1157,
1061
+ "step": 84500
1062
+ },
1063
+ {
1064
+ "epoch": 2.11,
1065
+ "learning_rate": 7.90411015239344e-05,
1066
+ "loss": 3.0581,
1067
+ "step": 85000
1068
+ },
1069
+ {
1070
+ "epoch": 2.12,
1071
+ "learning_rate": 7.902976627397088e-05,
1072
+ "loss": 3.206,
1073
+ "step": 85500
1074
+ },
1075
+ {
1076
+ "epoch": 2.13,
1077
+ "learning_rate": 7.901836524187085e-05,
1078
+ "loss": 3.2185,
1079
+ "step": 86000
1080
+ },
1081
+ {
1082
+ "epoch": 2.14,
1083
+ "learning_rate": 7.900689844685002e-05,
1084
+ "loss": 3.1204,
1085
+ "step": 86500
1086
+ },
1087
+ {
1088
+ "epoch": 2.16,
1089
+ "learning_rate": 7.899536590823493e-05,
1090
+ "loss": 3.2066,
1091
+ "step": 87000
1092
+ },
1093
+ {
1094
+ "epoch": 2.17,
1095
+ "learning_rate": 7.898376764546293e-05,
1096
+ "loss": 3.1684,
1097
+ "step": 87500
1098
+ },
1099
+ {
1100
+ "epoch": 2.18,
1101
+ "learning_rate": 7.89721036780821e-05,
1102
+ "loss": 3.1742,
1103
+ "step": 88000
1104
+ },
1105
+ {
1106
+ "epoch": 2.19,
1107
+ "learning_rate": 7.896037402575136e-05,
1108
+ "loss": 3.1598,
1109
+ "step": 88500
1110
+ },
1111
+ {
1112
+ "epoch": 2.21,
1113
+ "learning_rate": 7.894857870824023e-05,
1114
+ "loss": 3.0966,
1115
+ "step": 89000
1116
+ },
1117
+ {
1118
+ "epoch": 2.22,
1119
+ "learning_rate": 7.893671774542899e-05,
1120
+ "loss": 3.2035,
1121
+ "step": 89500
1122
+ },
1123
+ {
1124
+ "epoch": 2.23,
1125
+ "learning_rate": 7.892479115730848e-05,
1126
+ "loss": 3.215,
1127
+ "step": 90000
1128
+ },
1129
+ {
1130
+ "epoch": 2.24,
1131
+ "learning_rate": 7.891279896398023e-05,
1132
+ "loss": 3.153,
1133
+ "step": 90500
1134
+ },
1135
+ {
1136
+ "epoch": 2.26,
1137
+ "learning_rate": 7.890074118565626e-05,
1138
+ "loss": 3.1019,
1139
+ "step": 91000
1140
+ },
1141
+ {
1142
+ "epoch": 2.27,
1143
+ "learning_rate": 7.88886178426592e-05,
1144
+ "loss": 3.1509,
1145
+ "step": 91500
1146
+ },
1147
+ {
1148
+ "epoch": 2.28,
1149
+ "learning_rate": 7.887642895542218e-05,
1150
+ "loss": 3.1699,
1151
+ "step": 92000
1152
+ },
1153
+ {
1154
+ "epoch": 2.29,
1155
+ "learning_rate": 7.886417454448872e-05,
1156
+ "loss": 3.1162,
1157
+ "step": 92500
1158
+ },
1159
+ {
1160
+ "epoch": 2.3,
1161
+ "learning_rate": 7.885185463051289e-05,
1162
+ "loss": 3.1782,
1163
+ "step": 93000
1164
+ },
1165
+ {
1166
+ "epoch": 2.32,
1167
+ "learning_rate": 7.883946923425907e-05,
1168
+ "loss": 3.1546,
1169
+ "step": 93500
1170
+ },
1171
+ {
1172
+ "epoch": 2.33,
1173
+ "learning_rate": 7.882701837660205e-05,
1174
+ "loss": 3.1551,
1175
+ "step": 94000
1176
+ },
1177
+ {
1178
+ "epoch": 2.34,
1179
+ "learning_rate": 7.881450207852696e-05,
1180
+ "loss": 3.1327,
1181
+ "step": 94500
1182
+ },
1183
+ {
1184
+ "epoch": 2.35,
1185
+ "learning_rate": 7.880192036112917e-05,
1186
+ "loss": 3.0316,
1187
+ "step": 95000
1188
+ },
1189
+ {
1190
+ "epoch": 2.37,
1191
+ "learning_rate": 7.878927324561437e-05,
1192
+ "loss": 3.1897,
1193
+ "step": 95500
1194
+ },
1195
+ {
1196
+ "epoch": 2.38,
1197
+ "learning_rate": 7.877656075329846e-05,
1198
+ "loss": 3.1447,
1199
+ "step": 96000
1200
+ },
1201
+ {
1202
+ "epoch": 2.39,
1203
+ "learning_rate": 7.876378290560751e-05,
1204
+ "loss": 3.1764,
1205
+ "step": 96500
1206
+ },
1207
+ {
1208
+ "epoch": 2.4,
1209
+ "learning_rate": 7.875093972407774e-05,
1210
+ "loss": 3.1213,
1211
+ "step": 97000
1212
+ },
1213
+ {
1214
+ "epoch": 2.42,
1215
+ "learning_rate": 7.873803123035553e-05,
1216
+ "loss": 3.0774,
1217
+ "step": 97500
1218
+ },
1219
+ {
1220
+ "epoch": 2.43,
1221
+ "learning_rate": 7.872505744619728e-05,
1222
+ "loss": 3.1127,
1223
+ "step": 98000
1224
+ },
1225
+ {
1226
+ "epoch": 2.44,
1227
+ "learning_rate": 7.871201839346947e-05,
1228
+ "loss": 3.2621,
1229
+ "step": 98500
1230
+ },
1231
+ {
1232
+ "epoch": 2.45,
1233
+ "learning_rate": 7.869891409414858e-05,
1234
+ "loss": 3.2113,
1235
+ "step": 99000
1236
+ },
1237
+ {
1238
+ "epoch": 2.47,
1239
+ "learning_rate": 7.868574457032106e-05,
1240
+ "loss": 3.1218,
1241
+ "step": 99500
1242
+ },
1243
+ {
1244
+ "epoch": 2.48,
1245
+ "learning_rate": 7.867250984418328e-05,
1246
+ "loss": 3.1409,
1247
+ "step": 100000
1248
+ },
1249
+ {
1250
+ "epoch": 2.49,
1251
+ "learning_rate": 7.86592099380415e-05,
1252
+ "loss": 3.073,
1253
+ "step": 100500
1254
+ },
1255
+ {
1256
+ "epoch": 2.5,
1257
+ "learning_rate": 7.864584487431186e-05,
1258
+ "loss": 3.1624,
1259
+ "step": 101000
1260
+ },
1261
+ {
1262
+ "epoch": 2.52,
1263
+ "learning_rate": 7.863241467552032e-05,
1264
+ "loss": 3.2052,
1265
+ "step": 101500
1266
+ },
1267
+ {
1268
+ "epoch": 2.53,
1269
+ "learning_rate": 7.861891936430258e-05,
1270
+ "loss": 3.1714,
1271
+ "step": 102000
1272
+ },
1273
+ {
1274
+ "epoch": 2.54,
1275
+ "learning_rate": 7.860535896340414e-05,
1276
+ "loss": 3.1728,
1277
+ "step": 102500
1278
+ },
1279
+ {
1280
+ "epoch": 2.55,
1281
+ "learning_rate": 7.859173349568015e-05,
1282
+ "loss": 3.1564,
1283
+ "step": 103000
1284
+ },
1285
+ {
1286
+ "epoch": 2.57,
1287
+ "learning_rate": 7.857804298409547e-05,
1288
+ "loss": 3.0868,
1289
+ "step": 103500
1290
+ },
1291
+ {
1292
+ "epoch": 2.58,
1293
+ "learning_rate": 7.856428745172455e-05,
1294
+ "loss": 3.1485,
1295
+ "step": 104000
1296
+ },
1297
+ {
1298
+ "epoch": 2.59,
1299
+ "learning_rate": 7.855046692175145e-05,
1300
+ "loss": 3.2001,
1301
+ "step": 104500
1302
+ },
1303
+ {
1304
+ "epoch": 2.6,
1305
+ "learning_rate": 7.853658141746979e-05,
1306
+ "loss": 3.0651,
1307
+ "step": 105000
1308
+ },
1309
+ {
1310
+ "epoch": 2.61,
1311
+ "learning_rate": 7.852263096228267e-05,
1312
+ "loss": 3.1709,
1313
+ "step": 105500
1314
+ },
1315
+ {
1316
+ "epoch": 2.63,
1317
+ "learning_rate": 7.850861557970269e-05,
1318
+ "loss": 3.1635,
1319
+ "step": 106000
1320
+ },
1321
+ {
1322
+ "epoch": 2.64,
1323
+ "learning_rate": 7.849453529335188e-05,
1324
+ "loss": 3.1123,
1325
+ "step": 106500
1326
+ },
1327
+ {
1328
+ "epoch": 2.65,
1329
+ "learning_rate": 7.848039012696161e-05,
1330
+ "loss": 3.1162,
1331
+ "step": 107000
1332
+ },
1333
+ {
1334
+ "epoch": 2.66,
1335
+ "learning_rate": 7.846618010437265e-05,
1336
+ "loss": 3.1275,
1337
+ "step": 107500
1338
+ },
1339
+ {
1340
+ "epoch": 2.68,
1341
+ "learning_rate": 7.84519052495351e-05,
1342
+ "loss": 3.1035,
1343
+ "step": 108000
1344
+ },
1345
+ {
1346
+ "epoch": 2.69,
1347
+ "learning_rate": 7.843756558650827e-05,
1348
+ "loss": 3.0308,
1349
+ "step": 108500
1350
+ },
1351
+ {
1352
+ "epoch": 2.7,
1353
+ "learning_rate": 7.842316113946073e-05,
1354
+ "loss": 3.1311,
1355
+ "step": 109000
1356
+ },
1357
+ {
1358
+ "epoch": 2.71,
1359
+ "learning_rate": 7.840869193267027e-05,
1360
+ "loss": 3.1477,
1361
+ "step": 109500
1362
+ },
1363
+ {
1364
+ "epoch": 2.73,
1365
+ "learning_rate": 7.839415799052378e-05,
1366
+ "loss": 3.1221,
1367
+ "step": 110000
1368
+ },
1369
+ {
1370
+ "epoch": 2.74,
1371
+ "learning_rate": 7.837955933751725e-05,
1372
+ "loss": 3.1322,
1373
+ "step": 110500
1374
+ },
1375
+ {
1376
+ "epoch": 2.75,
1377
+ "learning_rate": 7.83648959982558e-05,
1378
+ "loss": 3.1488,
1379
+ "step": 111000
1380
+ },
1381
+ {
1382
+ "epoch": 2.76,
1383
+ "learning_rate": 7.835016799745353e-05,
1384
+ "loss": 3.0619,
1385
+ "step": 111500
1386
+ },
1387
+ {
1388
+ "epoch": 2.78,
1389
+ "learning_rate": 7.833537535993351e-05,
1390
+ "loss": 3.237,
1391
+ "step": 112000
1392
+ },
1393
+ {
1394
+ "epoch": 2.79,
1395
+ "learning_rate": 7.83205181106278e-05,
1396
+ "loss": 3.1022,
1397
+ "step": 112500
1398
+ },
1399
+ {
1400
+ "epoch": 2.8,
1401
+ "learning_rate": 7.83055962745773e-05,
1402
+ "loss": 3.066,
1403
+ "step": 113000
1404
+ },
1405
+ {
1406
+ "epoch": 2.81,
1407
+ "learning_rate": 7.829060987693179e-05,
1408
+ "loss": 3.1903,
1409
+ "step": 113500
1410
+ },
1411
+ {
1412
+ "epoch": 2.83,
1413
+ "learning_rate": 7.827555894294991e-05,
1414
+ "loss": 3.2208,
1415
+ "step": 114000
1416
+ },
1417
+ {
1418
+ "epoch": 2.84,
1419
+ "learning_rate": 7.8260443497999e-05,
1420
+ "loss": 3.2005,
1421
+ "step": 114500
1422
+ },
1423
+ {
1424
+ "epoch": 2.85,
1425
+ "learning_rate": 7.824526356755516e-05,
1426
+ "loss": 3.184,
1427
+ "step": 115000
1428
+ },
1429
+ {
1430
+ "epoch": 2.86,
1431
+ "learning_rate": 7.82300191772032e-05,
1432
+ "loss": 3.1185,
1433
+ "step": 115500
1434
+ },
1435
+ {
1436
+ "epoch": 2.87,
1437
+ "learning_rate": 7.821471035263653e-05,
1438
+ "loss": 3.1077,
1439
+ "step": 116000
1440
+ },
1441
+ {
1442
+ "epoch": 2.89,
1443
+ "learning_rate": 7.819933711965718e-05,
1444
+ "loss": 3.1901,
1445
+ "step": 116500
1446
+ },
1447
+ {
1448
+ "epoch": 2.9,
1449
+ "learning_rate": 7.818389950417574e-05,
1450
+ "loss": 3.1149,
1451
+ "step": 117000
1452
+ },
1453
+ {
1454
+ "epoch": 2.91,
1455
+ "learning_rate": 7.816839753221132e-05,
1456
+ "loss": 3.1393,
1457
+ "step": 117500
1458
+ },
1459
+ {
1460
+ "epoch": 2.92,
1461
+ "learning_rate": 7.815283122989147e-05,
1462
+ "loss": 3.0862,
1463
+ "step": 118000
1464
+ },
1465
+ {
1466
+ "epoch": 2.94,
1467
+ "learning_rate": 7.813720062345219e-05,
1468
+ "loss": 3.072,
1469
+ "step": 118500
1470
+ },
1471
+ {
1472
+ "epoch": 2.95,
1473
+ "learning_rate": 7.812150573923785e-05,
1474
+ "loss": 3.1103,
1475
+ "step": 119000
1476
+ },
1477
+ {
1478
+ "epoch": 2.96,
1479
+ "learning_rate": 7.810574660370116e-05,
1480
+ "loss": 3.2114,
1481
+ "step": 119500
1482
+ },
1483
+ {
1484
+ "epoch": 2.97,
1485
+ "learning_rate": 7.808992324340312e-05,
1486
+ "loss": 3.1425,
1487
+ "step": 120000
1488
+ },
1489
+ {
1490
+ "epoch": 2.99,
1491
+ "learning_rate": 7.807403568501297e-05,
1492
+ "loss": 3.1051,
1493
+ "step": 120500
1494
+ },
1495
+ {
1496
+ "epoch": 3.0,
1497
+ "learning_rate": 7.805808395530816e-05,
1498
+ "loss": 3.1355,
1499
+ "step": 121000
1500
+ },
1501
+ {
1502
+ "epoch": 3.0,
1503
+ "eval_bleu": 1.0,
1504
+ "eval_brevity_penalty": 1.0,
1505
+ "eval_length_ratio": 1.0,
1506
+ "eval_loss": 3.0360162258148193,
1507
+ "eval_precisions": [
1508
+ 1.0,
1509
+ 1.0,
1510
+ 1.0,
1511
+ 1.0
1512
+ ],
1513
+ "eval_reference_length": 4591104,
1514
+ "eval_runtime": 8117.2024,
1515
+ "eval_samples_per_second": 1.105,
1516
+ "eval_steps_per_second": 0.552,
1517
+ "eval_translation_length": 4591104,
1518
+ "step": 121044
1519
  }
1520
  ],
1521
  "logging_steps": 500,
1522
  "max_steps": 1210440,
1523
  "num_train_epochs": 30,
1524
  "save_steps": 1000,
1525
+ "total_flos": 2.077405682985861e+18,
1526
  "trial_name": null,
1527
  "trial_params": null
1528
  }