abdiharyadi commited on
Commit
37e0d02
1 Parent(s): fc33ab4

Training in progress, step 7200, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28549ff71b23ba106f59a24bd8bca3d46371ca6d6b5cadd178cabad916347ab0
3
  size 1575259780
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3fad6a2bd8c0561496dc3383467f8817292411148c9873036bdb69e35b5543d
3
  size 1575259780
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c34ed2ea0e044ab189b8a9c9cc0334a52864a1c91e23fb051e7c5f78f603426
3
  size 3150397656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2cff55d576f8616b2e2cecc6803e8b473e51aac0379039756299682d060075c
3
  size 3150397656
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9932d8abe3b276d71cedc1c0668f5337155e46aef28bb74224ba56f1f83303fe
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d43647c33bad1e742cdbb1fe16026c6964f3f2731dbafc990c955d70accc926b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8da6ecbf3764caecedc3b76c95190204f7fb2ff7ab091e09448777ff201764c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72d5d233f2b2864a95b1df30795943c1ca367974d3ff0da03626224e18769293
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 41.1359,
3
- "best_model_checkpoint": "/kaggle/working/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted-amr-generation-v2-fted/checkpoint-3600",
4
- "epoch": 39.735099337748345,
5
  "eval_steps": 3600,
6
- "global_step": 3600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1103,6 +1103,1096 @@
1103
  "eval_samples_per_second": 7.596,
1104
  "eval_steps_per_second": 1.519,
1105
  "step": 3600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1106
  }
1107
  ],
1108
  "logging_steps": 20,
@@ -1122,7 +2212,7 @@
1122
  "attributes": {}
1123
  }
1124
  },
1125
- "total_flos": 1.0641987012722688e+16,
1126
  "train_batch_size": 5,
1127
  "trial_name": null,
1128
  "trial_params": null
 
1
  {
2
+ "best_metric": 44.4125,
3
+ "best_model_checkpoint": "/kaggle/working/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted-amr-generation-v2-fted/checkpoint-7200",
4
+ "epoch": 79.47019867549669,
5
  "eval_steps": 3600,
6
+ "global_step": 7200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1103
  "eval_samples_per_second": 7.596,
1104
  "eval_steps_per_second": 1.519,
1105
  "step": 3600
1106
+ },
1107
+ {
1108
+ "epoch": 39.95584988962472,
1109
+ "learning_rate": 9.46376306620209e-07,
1110
+ "loss": 1.6224,
1111
+ "step": 3620
1112
+ },
1113
+ {
1114
+ "epoch": 40.1766004415011,
1115
+ "learning_rate": 9.460627177700348e-07,
1116
+ "loss": 1.5808,
1117
+ "step": 3640
1118
+ },
1119
+ {
1120
+ "epoch": 40.397350993377486,
1121
+ "learning_rate": 9.457491289198605e-07,
1122
+ "loss": 1.6187,
1123
+ "step": 3660
1124
+ },
1125
+ {
1126
+ "epoch": 40.618101545253865,
1127
+ "learning_rate": 9.454355400696864e-07,
1128
+ "loss": 1.6131,
1129
+ "step": 3680
1130
+ },
1131
+ {
1132
+ "epoch": 40.83885209713024,
1133
+ "learning_rate": 9.451219512195122e-07,
1134
+ "loss": 1.603,
1135
+ "step": 3700
1136
+ },
1137
+ {
1138
+ "epoch": 41.05960264900662,
1139
+ "learning_rate": 9.448083623693379e-07,
1140
+ "loss": 1.6253,
1141
+ "step": 3720
1142
+ },
1143
+ {
1144
+ "epoch": 41.280353200883,
1145
+ "learning_rate": 9.444947735191638e-07,
1146
+ "loss": 1.6125,
1147
+ "step": 3740
1148
+ },
1149
+ {
1150
+ "epoch": 41.501103752759384,
1151
+ "learning_rate": 9.441811846689895e-07,
1152
+ "loss": 1.5777,
1153
+ "step": 3760
1154
+ },
1155
+ {
1156
+ "epoch": 41.72185430463576,
1157
+ "learning_rate": 9.438675958188153e-07,
1158
+ "loss": 1.5642,
1159
+ "step": 3780
1160
+ },
1161
+ {
1162
+ "epoch": 41.94260485651214,
1163
+ "learning_rate": 9.43554006968641e-07,
1164
+ "loss": 1.5773,
1165
+ "step": 3800
1166
+ },
1167
+ {
1168
+ "epoch": 42.16335540838852,
1169
+ "learning_rate": 9.432404181184669e-07,
1170
+ "loss": 1.5218,
1171
+ "step": 3820
1172
+ },
1173
+ {
1174
+ "epoch": 42.384105960264904,
1175
+ "learning_rate": 9.429268292682926e-07,
1176
+ "loss": 1.5751,
1177
+ "step": 3840
1178
+ },
1179
+ {
1180
+ "epoch": 42.60485651214128,
1181
+ "learning_rate": 9.426132404181184e-07,
1182
+ "loss": 1.5686,
1183
+ "step": 3860
1184
+ },
1185
+ {
1186
+ "epoch": 42.82560706401766,
1187
+ "learning_rate": 9.422996515679442e-07,
1188
+ "loss": 1.5917,
1189
+ "step": 3880
1190
+ },
1191
+ {
1192
+ "epoch": 43.04635761589404,
1193
+ "learning_rate": 9.4198606271777e-07,
1194
+ "loss": 1.5427,
1195
+ "step": 3900
1196
+ },
1197
+ {
1198
+ "epoch": 43.26710816777042,
1199
+ "learning_rate": 9.416724738675958e-07,
1200
+ "loss": 1.6199,
1201
+ "step": 3920
1202
+ },
1203
+ {
1204
+ "epoch": 43.4878587196468,
1205
+ "learning_rate": 9.413588850174215e-07,
1206
+ "loss": 1.5946,
1207
+ "step": 3940
1208
+ },
1209
+ {
1210
+ "epoch": 43.70860927152318,
1211
+ "learning_rate": 9.410452961672474e-07,
1212
+ "loss": 1.6212,
1213
+ "step": 3960
1214
+ },
1215
+ {
1216
+ "epoch": 43.92935982339956,
1217
+ "learning_rate": 9.407317073170731e-07,
1218
+ "loss": 1.5569,
1219
+ "step": 3980
1220
+ },
1221
+ {
1222
+ "epoch": 44.150110375275936,
1223
+ "learning_rate": 9.404181184668989e-07,
1224
+ "loss": 1.521,
1225
+ "step": 4000
1226
+ },
1227
+ {
1228
+ "epoch": 44.370860927152314,
1229
+ "learning_rate": 9.401045296167247e-07,
1230
+ "loss": 1.56,
1231
+ "step": 4020
1232
+ },
1233
+ {
1234
+ "epoch": 44.5916114790287,
1235
+ "learning_rate": 9.397909407665504e-07,
1236
+ "loss": 1.6059,
1237
+ "step": 4040
1238
+ },
1239
+ {
1240
+ "epoch": 44.81236203090508,
1241
+ "learning_rate": 9.394773519163763e-07,
1242
+ "loss": 1.5684,
1243
+ "step": 4060
1244
+ },
1245
+ {
1246
+ "epoch": 45.033112582781456,
1247
+ "learning_rate": 9.39163763066202e-07,
1248
+ "loss": 1.539,
1249
+ "step": 4080
1250
+ },
1251
+ {
1252
+ "epoch": 45.253863134657834,
1253
+ "learning_rate": 9.388501742160278e-07,
1254
+ "loss": 1.5336,
1255
+ "step": 4100
1256
+ },
1257
+ {
1258
+ "epoch": 45.47461368653422,
1259
+ "learning_rate": 9.385365853658536e-07,
1260
+ "loss": 1.5521,
1261
+ "step": 4120
1262
+ },
1263
+ {
1264
+ "epoch": 45.6953642384106,
1265
+ "learning_rate": 9.382229965156794e-07,
1266
+ "loss": 1.5281,
1267
+ "step": 4140
1268
+ },
1269
+ {
1270
+ "epoch": 45.916114790286976,
1271
+ "learning_rate": 9.379094076655052e-07,
1272
+ "loss": 1.6375,
1273
+ "step": 4160
1274
+ },
1275
+ {
1276
+ "epoch": 46.136865342163354,
1277
+ "learning_rate": 9.375958188153309e-07,
1278
+ "loss": 1.5615,
1279
+ "step": 4180
1280
+ },
1281
+ {
1282
+ "epoch": 46.35761589403973,
1283
+ "learning_rate": 9.372822299651568e-07,
1284
+ "loss": 1.5181,
1285
+ "step": 4200
1286
+ },
1287
+ {
1288
+ "epoch": 46.57836644591612,
1289
+ "learning_rate": 9.369686411149824e-07,
1290
+ "loss": 1.6009,
1291
+ "step": 4220
1292
+ },
1293
+ {
1294
+ "epoch": 46.799116997792495,
1295
+ "learning_rate": 9.366550522648083e-07,
1296
+ "loss": 1.5895,
1297
+ "step": 4240
1298
+ },
1299
+ {
1300
+ "epoch": 47.019867549668874,
1301
+ "learning_rate": 9.363414634146342e-07,
1302
+ "loss": 1.5159,
1303
+ "step": 4260
1304
+ },
1305
+ {
1306
+ "epoch": 47.24061810154525,
1307
+ "learning_rate": 9.360278745644599e-07,
1308
+ "loss": 1.5179,
1309
+ "step": 4280
1310
+ },
1311
+ {
1312
+ "epoch": 47.46136865342164,
1313
+ "learning_rate": 9.357142857142857e-07,
1314
+ "loss": 1.5585,
1315
+ "step": 4300
1316
+ },
1317
+ {
1318
+ "epoch": 47.682119205298015,
1319
+ "learning_rate": 9.354006968641114e-07,
1320
+ "loss": 1.6147,
1321
+ "step": 4320
1322
+ },
1323
+ {
1324
+ "epoch": 47.90286975717439,
1325
+ "learning_rate": 9.350871080139373e-07,
1326
+ "loss": 1.5307,
1327
+ "step": 4340
1328
+ },
1329
+ {
1330
+ "epoch": 48.12362030905077,
1331
+ "learning_rate": 9.34773519163763e-07,
1332
+ "loss": 1.5673,
1333
+ "step": 4360
1334
+ },
1335
+ {
1336
+ "epoch": 48.34437086092715,
1337
+ "learning_rate": 9.344599303135888e-07,
1338
+ "loss": 1.5442,
1339
+ "step": 4380
1340
+ },
1341
+ {
1342
+ "epoch": 48.565121412803535,
1343
+ "learning_rate": 9.341463414634146e-07,
1344
+ "loss": 1.5177,
1345
+ "step": 4400
1346
+ },
1347
+ {
1348
+ "epoch": 48.78587196467991,
1349
+ "learning_rate": 9.338327526132404e-07,
1350
+ "loss": 1.5909,
1351
+ "step": 4420
1352
+ },
1353
+ {
1354
+ "epoch": 49.00662251655629,
1355
+ "learning_rate": 9.335191637630661e-07,
1356
+ "loss": 1.5565,
1357
+ "step": 4440
1358
+ },
1359
+ {
1360
+ "epoch": 49.22737306843267,
1361
+ "learning_rate": 9.332055749128919e-07,
1362
+ "loss": 1.4945,
1363
+ "step": 4460
1364
+ },
1365
+ {
1366
+ "epoch": 49.44812362030905,
1367
+ "learning_rate": 9.328919860627177e-07,
1368
+ "loss": 1.5253,
1369
+ "step": 4480
1370
+ },
1371
+ {
1372
+ "epoch": 49.66887417218543,
1373
+ "learning_rate": 9.325783972125436e-07,
1374
+ "loss": 1.563,
1375
+ "step": 4500
1376
+ },
1377
+ {
1378
+ "epoch": 49.88962472406181,
1379
+ "learning_rate": 9.322648083623693e-07,
1380
+ "loss": 1.6016,
1381
+ "step": 4520
1382
+ },
1383
+ {
1384
+ "epoch": 50.11037527593819,
1385
+ "learning_rate": 9.319512195121951e-07,
1386
+ "loss": 1.6032,
1387
+ "step": 4540
1388
+ },
1389
+ {
1390
+ "epoch": 50.33112582781457,
1391
+ "learning_rate": 9.316376306620209e-07,
1392
+ "loss": 1.5038,
1393
+ "step": 4560
1394
+ },
1395
+ {
1396
+ "epoch": 50.55187637969095,
1397
+ "learning_rate": 9.313240418118467e-07,
1398
+ "loss": 1.545,
1399
+ "step": 4580
1400
+ },
1401
+ {
1402
+ "epoch": 50.77262693156733,
1403
+ "learning_rate": 9.310104529616724e-07,
1404
+ "loss": 1.5676,
1405
+ "step": 4600
1406
+ },
1407
+ {
1408
+ "epoch": 50.99337748344371,
1409
+ "learning_rate": 9.306968641114981e-07,
1410
+ "loss": 1.5443,
1411
+ "step": 4620
1412
+ },
1413
+ {
1414
+ "epoch": 51.21412803532009,
1415
+ "learning_rate": 9.303832752613241e-07,
1416
+ "loss": 1.5608,
1417
+ "step": 4640
1418
+ },
1419
+ {
1420
+ "epoch": 51.434878587196465,
1421
+ "learning_rate": 9.300696864111497e-07,
1422
+ "loss": 1.551,
1423
+ "step": 4660
1424
+ },
1425
+ {
1426
+ "epoch": 51.65562913907285,
1427
+ "learning_rate": 9.297560975609756e-07,
1428
+ "loss": 1.5556,
1429
+ "step": 4680
1430
+ },
1431
+ {
1432
+ "epoch": 51.87637969094923,
1433
+ "learning_rate": 9.294425087108013e-07,
1434
+ "loss": 1.5517,
1435
+ "step": 4700
1436
+ },
1437
+ {
1438
+ "epoch": 52.09713024282561,
1439
+ "learning_rate": 9.291289198606272e-07,
1440
+ "loss": 1.5384,
1441
+ "step": 4720
1442
+ },
1443
+ {
1444
+ "epoch": 52.317880794701985,
1445
+ "learning_rate": 9.288153310104528e-07,
1446
+ "loss": 1.4869,
1447
+ "step": 4740
1448
+ },
1449
+ {
1450
+ "epoch": 52.53863134657836,
1451
+ "learning_rate": 9.285017421602787e-07,
1452
+ "loss": 1.5221,
1453
+ "step": 4760
1454
+ },
1455
+ {
1456
+ "epoch": 52.75938189845475,
1457
+ "learning_rate": 9.281881533101046e-07,
1458
+ "loss": 1.5883,
1459
+ "step": 4780
1460
+ },
1461
+ {
1462
+ "epoch": 52.980132450331126,
1463
+ "learning_rate": 9.278745644599303e-07,
1464
+ "loss": 1.5276,
1465
+ "step": 4800
1466
+ },
1467
+ {
1468
+ "epoch": 53.200883002207505,
1469
+ "learning_rate": 9.275609756097561e-07,
1470
+ "loss": 1.4969,
1471
+ "step": 4820
1472
+ },
1473
+ {
1474
+ "epoch": 53.42163355408388,
1475
+ "learning_rate": 9.272473867595818e-07,
1476
+ "loss": 1.5043,
1477
+ "step": 4840
1478
+ },
1479
+ {
1480
+ "epoch": 53.64238410596027,
1481
+ "learning_rate": 9.269337979094077e-07,
1482
+ "loss": 1.5205,
1483
+ "step": 4860
1484
+ },
1485
+ {
1486
+ "epoch": 53.863134657836646,
1487
+ "learning_rate": 9.266202090592334e-07,
1488
+ "loss": 1.4685,
1489
+ "step": 4880
1490
+ },
1491
+ {
1492
+ "epoch": 54.083885209713024,
1493
+ "learning_rate": 9.263066202090592e-07,
1494
+ "loss": 1.5099,
1495
+ "step": 4900
1496
+ },
1497
+ {
1498
+ "epoch": 54.3046357615894,
1499
+ "learning_rate": 9.25993031358885e-07,
1500
+ "loss": 1.531,
1501
+ "step": 4920
1502
+ },
1503
+ {
1504
+ "epoch": 54.52538631346578,
1505
+ "learning_rate": 9.256794425087108e-07,
1506
+ "loss": 1.5637,
1507
+ "step": 4940
1508
+ },
1509
+ {
1510
+ "epoch": 54.746136865342166,
1511
+ "learning_rate": 9.253658536585365e-07,
1512
+ "loss": 1.5795,
1513
+ "step": 4960
1514
+ },
1515
+ {
1516
+ "epoch": 54.966887417218544,
1517
+ "learning_rate": 9.250522648083623e-07,
1518
+ "loss": 1.473,
1519
+ "step": 4980
1520
+ },
1521
+ {
1522
+ "epoch": 55.18763796909492,
1523
+ "learning_rate": 9.247386759581881e-07,
1524
+ "loss": 1.4921,
1525
+ "step": 5000
1526
+ },
1527
+ {
1528
+ "epoch": 55.4083885209713,
1529
+ "learning_rate": 9.244250871080139e-07,
1530
+ "loss": 1.5147,
1531
+ "step": 5020
1532
+ },
1533
+ {
1534
+ "epoch": 55.629139072847686,
1535
+ "learning_rate": 9.241114982578397e-07,
1536
+ "loss": 1.5259,
1537
+ "step": 5040
1538
+ },
1539
+ {
1540
+ "epoch": 55.849889624724064,
1541
+ "learning_rate": 9.237979094076655e-07,
1542
+ "loss": 1.5481,
1543
+ "step": 5060
1544
+ },
1545
+ {
1546
+ "epoch": 56.07064017660044,
1547
+ "learning_rate": 9.234843205574913e-07,
1548
+ "loss": 1.5238,
1549
+ "step": 5080
1550
+ },
1551
+ {
1552
+ "epoch": 56.29139072847682,
1553
+ "learning_rate": 9.23170731707317e-07,
1554
+ "loss": 1.5497,
1555
+ "step": 5100
1556
+ },
1557
+ {
1558
+ "epoch": 56.5121412803532,
1559
+ "learning_rate": 9.228571428571428e-07,
1560
+ "loss": 1.5003,
1561
+ "step": 5120
1562
+ },
1563
+ {
1564
+ "epoch": 56.73289183222958,
1565
+ "learning_rate": 9.225435540069686e-07,
1566
+ "loss": 1.511,
1567
+ "step": 5140
1568
+ },
1569
+ {
1570
+ "epoch": 56.95364238410596,
1571
+ "learning_rate": 9.222299651567944e-07,
1572
+ "loss": 1.5426,
1573
+ "step": 5160
1574
+ },
1575
+ {
1576
+ "epoch": 57.17439293598234,
1577
+ "learning_rate": 9.219163763066201e-07,
1578
+ "loss": 1.4937,
1579
+ "step": 5180
1580
+ },
1581
+ {
1582
+ "epoch": 57.39514348785872,
1583
+ "learning_rate": 9.216027874564459e-07,
1584
+ "loss": 1.5059,
1585
+ "step": 5200
1586
+ },
1587
+ {
1588
+ "epoch": 57.615894039735096,
1589
+ "learning_rate": 9.212891986062717e-07,
1590
+ "loss": 1.548,
1591
+ "step": 5220
1592
+ },
1593
+ {
1594
+ "epoch": 57.83664459161148,
1595
+ "learning_rate": 9.209756097560976e-07,
1596
+ "loss": 1.4691,
1597
+ "step": 5240
1598
+ },
1599
+ {
1600
+ "epoch": 58.05739514348786,
1601
+ "learning_rate": 9.206620209059232e-07,
1602
+ "loss": 1.5459,
1603
+ "step": 5260
1604
+ },
1605
+ {
1606
+ "epoch": 58.27814569536424,
1607
+ "learning_rate": 9.203484320557491e-07,
1608
+ "loss": 1.5569,
1609
+ "step": 5280
1610
+ },
1611
+ {
1612
+ "epoch": 58.498896247240616,
1613
+ "learning_rate": 9.200348432055748e-07,
1614
+ "loss": 1.4634,
1615
+ "step": 5300
1616
+ },
1617
+ {
1618
+ "epoch": 58.719646799117,
1619
+ "learning_rate": 9.197212543554007e-07,
1620
+ "loss": 1.4567,
1621
+ "step": 5320
1622
+ },
1623
+ {
1624
+ "epoch": 58.94039735099338,
1625
+ "learning_rate": 9.194076655052265e-07,
1626
+ "loss": 1.5349,
1627
+ "step": 5340
1628
+ },
1629
+ {
1630
+ "epoch": 59.16114790286976,
1631
+ "learning_rate": 9.190940766550522e-07,
1632
+ "loss": 1.4706,
1633
+ "step": 5360
1634
+ },
1635
+ {
1636
+ "epoch": 59.381898454746135,
1637
+ "learning_rate": 9.187804878048781e-07,
1638
+ "loss": 1.4932,
1639
+ "step": 5380
1640
+ },
1641
+ {
1642
+ "epoch": 59.602649006622514,
1643
+ "learning_rate": 9.184668989547037e-07,
1644
+ "loss": 1.5204,
1645
+ "step": 5400
1646
+ },
1647
+ {
1648
+ "epoch": 59.8233995584989,
1649
+ "learning_rate": 9.181533101045296e-07,
1650
+ "loss": 1.5267,
1651
+ "step": 5420
1652
+ },
1653
+ {
1654
+ "epoch": 60.04415011037528,
1655
+ "learning_rate": 9.178397212543552e-07,
1656
+ "loss": 1.5275,
1657
+ "step": 5440
1658
+ },
1659
+ {
1660
+ "epoch": 60.264900662251655,
1661
+ "learning_rate": 9.175261324041812e-07,
1662
+ "loss": 1.5116,
1663
+ "step": 5460
1664
+ },
1665
+ {
1666
+ "epoch": 60.48565121412803,
1667
+ "learning_rate": 9.172125435540069e-07,
1668
+ "loss": 1.4686,
1669
+ "step": 5480
1670
+ },
1671
+ {
1672
+ "epoch": 60.70640176600442,
1673
+ "learning_rate": 9.168989547038327e-07,
1674
+ "loss": 1.4902,
1675
+ "step": 5500
1676
+ },
1677
+ {
1678
+ "epoch": 60.9271523178808,
1679
+ "learning_rate": 9.165853658536585e-07,
1680
+ "loss": 1.4856,
1681
+ "step": 5520
1682
+ },
1683
+ {
1684
+ "epoch": 61.147902869757175,
1685
+ "learning_rate": 9.162717770034843e-07,
1686
+ "loss": 1.4982,
1687
+ "step": 5540
1688
+ },
1689
+ {
1690
+ "epoch": 61.36865342163355,
1691
+ "learning_rate": 9.1595818815331e-07,
1692
+ "loss": 1.4829,
1693
+ "step": 5560
1694
+ },
1695
+ {
1696
+ "epoch": 61.58940397350993,
1697
+ "learning_rate": 9.156445993031358e-07,
1698
+ "loss": 1.5049,
1699
+ "step": 5580
1700
+ },
1701
+ {
1702
+ "epoch": 61.81015452538632,
1703
+ "learning_rate": 9.153310104529617e-07,
1704
+ "loss": 1.5125,
1705
+ "step": 5600
1706
+ },
1707
+ {
1708
+ "epoch": 62.030905077262695,
1709
+ "learning_rate": 9.150174216027874e-07,
1710
+ "loss": 1.4746,
1711
+ "step": 5620
1712
+ },
1713
+ {
1714
+ "epoch": 62.25165562913907,
1715
+ "learning_rate": 9.147038327526132e-07,
1716
+ "loss": 1.5128,
1717
+ "step": 5640
1718
+ },
1719
+ {
1720
+ "epoch": 62.47240618101545,
1721
+ "learning_rate": 9.14390243902439e-07,
1722
+ "loss": 1.4799,
1723
+ "step": 5660
1724
+ },
1725
+ {
1726
+ "epoch": 62.69315673289183,
1727
+ "learning_rate": 9.140766550522648e-07,
1728
+ "loss": 1.4395,
1729
+ "step": 5680
1730
+ },
1731
+ {
1732
+ "epoch": 62.913907284768214,
1733
+ "learning_rate": 9.137630662020905e-07,
1734
+ "loss": 1.5277,
1735
+ "step": 5700
1736
+ },
1737
+ {
1738
+ "epoch": 63.13465783664459,
1739
+ "learning_rate": 9.134494773519163e-07,
1740
+ "loss": 1.4806,
1741
+ "step": 5720
1742
+ },
1743
+ {
1744
+ "epoch": 63.35540838852097,
1745
+ "learning_rate": 9.131358885017421e-07,
1746
+ "loss": 1.5123,
1747
+ "step": 5740
1748
+ },
1749
+ {
1750
+ "epoch": 63.57615894039735,
1751
+ "learning_rate": 9.12822299651568e-07,
1752
+ "loss": 1.5502,
1753
+ "step": 5760
1754
+ },
1755
+ {
1756
+ "epoch": 63.796909492273734,
1757
+ "learning_rate": 9.125087108013936e-07,
1758
+ "loss": 1.4732,
1759
+ "step": 5780
1760
+ },
1761
+ {
1762
+ "epoch": 64.0176600441501,
1763
+ "learning_rate": 9.121951219512195e-07,
1764
+ "loss": 1.4694,
1765
+ "step": 5800
1766
+ },
1767
+ {
1768
+ "epoch": 64.23841059602648,
1769
+ "learning_rate": 9.118815331010452e-07,
1770
+ "loss": 1.4394,
1771
+ "step": 5820
1772
+ },
1773
+ {
1774
+ "epoch": 64.45916114790288,
1775
+ "learning_rate": 9.11567944250871e-07,
1776
+ "loss": 1.4776,
1777
+ "step": 5840
1778
+ },
1779
+ {
1780
+ "epoch": 64.67991169977925,
1781
+ "learning_rate": 9.112543554006967e-07,
1782
+ "loss": 1.5363,
1783
+ "step": 5860
1784
+ },
1785
+ {
1786
+ "epoch": 64.90066225165563,
1787
+ "learning_rate": 9.109407665505226e-07,
1788
+ "loss": 1.4584,
1789
+ "step": 5880
1790
+ },
1791
+ {
1792
+ "epoch": 65.12141280353201,
1793
+ "learning_rate": 9.106271777003485e-07,
1794
+ "loss": 1.4956,
1795
+ "step": 5900
1796
+ },
1797
+ {
1798
+ "epoch": 65.34216335540839,
1799
+ "learning_rate": 9.103135888501741e-07,
1800
+ "loss": 1.4672,
1801
+ "step": 5920
1802
+ },
1803
+ {
1804
+ "epoch": 65.56291390728477,
1805
+ "learning_rate": 9.1e-07,
1806
+ "loss": 1.4765,
1807
+ "step": 5940
1808
+ },
1809
+ {
1810
+ "epoch": 65.78366445916114,
1811
+ "learning_rate": 9.096864111498257e-07,
1812
+ "loss": 1.4821,
1813
+ "step": 5960
1814
+ },
1815
+ {
1816
+ "epoch": 66.00441501103752,
1817
+ "learning_rate": 9.093728222996516e-07,
1818
+ "loss": 1.4561,
1819
+ "step": 5980
1820
+ },
1821
+ {
1822
+ "epoch": 66.2251655629139,
1823
+ "learning_rate": 9.090592334494772e-07,
1824
+ "loss": 1.514,
1825
+ "step": 6000
1826
+ },
1827
+ {
1828
+ "epoch": 66.4459161147903,
1829
+ "learning_rate": 9.08745644599303e-07,
1830
+ "loss": 1.495,
1831
+ "step": 6020
1832
+ },
1833
+ {
1834
+ "epoch": 66.66666666666667,
1835
+ "learning_rate": 9.084320557491289e-07,
1836
+ "loss": 1.4413,
1837
+ "step": 6040
1838
+ },
1839
+ {
1840
+ "epoch": 66.88741721854305,
1841
+ "learning_rate": 9.081184668989546e-07,
1842
+ "loss": 1.4489,
1843
+ "step": 6060
1844
+ },
1845
+ {
1846
+ "epoch": 67.10816777041943,
1847
+ "learning_rate": 9.078048780487804e-07,
1848
+ "loss": 1.4897,
1849
+ "step": 6080
1850
+ },
1851
+ {
1852
+ "epoch": 67.3289183222958,
1853
+ "learning_rate": 9.074912891986062e-07,
1854
+ "loss": 1.4299,
1855
+ "step": 6100
1856
+ },
1857
+ {
1858
+ "epoch": 67.54966887417218,
1859
+ "learning_rate": 9.071777003484321e-07,
1860
+ "loss": 1.4706,
1861
+ "step": 6120
1862
+ },
1863
+ {
1864
+ "epoch": 67.77041942604856,
1865
+ "learning_rate": 9.068641114982577e-07,
1866
+ "loss": 1.5075,
1867
+ "step": 6140
1868
+ },
1869
+ {
1870
+ "epoch": 67.99116997792494,
1871
+ "learning_rate": 9.065505226480836e-07,
1872
+ "loss": 1.5227,
1873
+ "step": 6160
1874
+ },
1875
+ {
1876
+ "epoch": 68.21192052980132,
1877
+ "learning_rate": 9.062369337979094e-07,
1878
+ "loss": 1.456,
1879
+ "step": 6180
1880
+ },
1881
+ {
1882
+ "epoch": 68.43267108167771,
1883
+ "learning_rate": 9.059233449477352e-07,
1884
+ "loss": 1.4677,
1885
+ "step": 6200
1886
+ },
1887
+ {
1888
+ "epoch": 68.65342163355409,
1889
+ "learning_rate": 9.056097560975609e-07,
1890
+ "loss": 1.4993,
1891
+ "step": 6220
1892
+ },
1893
+ {
1894
+ "epoch": 68.87417218543047,
1895
+ "learning_rate": 9.052961672473867e-07,
1896
+ "loss": 1.4612,
1897
+ "step": 6240
1898
+ },
1899
+ {
1900
+ "epoch": 69.09492273730685,
1901
+ "learning_rate": 9.049825783972125e-07,
1902
+ "loss": 1.4549,
1903
+ "step": 6260
1904
+ },
1905
+ {
1906
+ "epoch": 69.31567328918322,
1907
+ "learning_rate": 9.046689895470383e-07,
1908
+ "loss": 1.5033,
1909
+ "step": 6280
1910
+ },
1911
+ {
1912
+ "epoch": 69.5364238410596,
1913
+ "learning_rate": 9.04355400696864e-07,
1914
+ "loss": 1.4558,
1915
+ "step": 6300
1916
+ },
1917
+ {
1918
+ "epoch": 69.75717439293598,
1919
+ "learning_rate": 9.040418118466899e-07,
1920
+ "loss": 1.4884,
1921
+ "step": 6320
1922
+ },
1923
+ {
1924
+ "epoch": 69.97792494481236,
1925
+ "learning_rate": 9.037282229965156e-07,
1926
+ "loss": 1.4332,
1927
+ "step": 6340
1928
+ },
1929
+ {
1930
+ "epoch": 70.19867549668874,
1931
+ "learning_rate": 9.034146341463414e-07,
1932
+ "loss": 1.455,
1933
+ "step": 6360
1934
+ },
1935
+ {
1936
+ "epoch": 70.41942604856513,
1937
+ "learning_rate": 9.031010452961671e-07,
1938
+ "loss": 1.4155,
1939
+ "step": 6380
1940
+ },
1941
+ {
1942
+ "epoch": 70.6401766004415,
1943
+ "learning_rate": 9.02787456445993e-07,
1944
+ "loss": 1.5361,
1945
+ "step": 6400
1946
+ },
1947
+ {
1948
+ "epoch": 70.86092715231788,
1949
+ "learning_rate": 9.024738675958189e-07,
1950
+ "loss": 1.4696,
1951
+ "step": 6420
1952
+ },
1953
+ {
1954
+ "epoch": 71.08167770419426,
1955
+ "learning_rate": 9.021602787456445e-07,
1956
+ "loss": 1.4936,
1957
+ "step": 6440
1958
+ },
1959
+ {
1960
+ "epoch": 71.30242825607064,
1961
+ "learning_rate": 9.018466898954704e-07,
1962
+ "loss": 1.4689,
1963
+ "step": 6460
1964
+ },
1965
+ {
1966
+ "epoch": 71.52317880794702,
1967
+ "learning_rate": 9.015331010452961e-07,
1968
+ "loss": 1.4862,
1969
+ "step": 6480
1970
+ },
1971
+ {
1972
+ "epoch": 71.7439293598234,
1973
+ "learning_rate": 9.012195121951219e-07,
1974
+ "loss": 1.463,
1975
+ "step": 6500
1976
+ },
1977
+ {
1978
+ "epoch": 71.96467991169978,
1979
+ "learning_rate": 9.009059233449477e-07,
1980
+ "loss": 1.4663,
1981
+ "step": 6520
1982
+ },
1983
+ {
1984
+ "epoch": 72.18543046357615,
1985
+ "learning_rate": 9.005923344947735e-07,
1986
+ "loss": 1.4918,
1987
+ "step": 6540
1988
+ },
1989
+ {
1990
+ "epoch": 72.40618101545253,
1991
+ "learning_rate": 9.002787456445993e-07,
1992
+ "loss": 1.474,
1993
+ "step": 6560
1994
+ },
1995
+ {
1996
+ "epoch": 72.62693156732892,
1997
+ "learning_rate": 8.99965156794425e-07,
1998
+ "loss": 1.4693,
1999
+ "step": 6580
2000
+ },
2001
+ {
2002
+ "epoch": 72.8476821192053,
2003
+ "learning_rate": 8.996515679442507e-07,
2004
+ "loss": 1.438,
2005
+ "step": 6600
2006
+ },
2007
+ {
2008
+ "epoch": 73.06843267108168,
2009
+ "learning_rate": 8.993379790940766e-07,
2010
+ "loss": 1.4405,
2011
+ "step": 6620
2012
+ },
2013
+ {
2014
+ "epoch": 73.28918322295806,
2015
+ "learning_rate": 8.990243902439025e-07,
2016
+ "loss": 1.4501,
2017
+ "step": 6640
2018
+ },
2019
+ {
2020
+ "epoch": 73.50993377483444,
2021
+ "learning_rate": 8.987108013937282e-07,
2022
+ "loss": 1.4801,
2023
+ "step": 6660
2024
+ },
2025
+ {
2026
+ "epoch": 73.73068432671081,
2027
+ "learning_rate": 8.98397212543554e-07,
2028
+ "loss": 1.4407,
2029
+ "step": 6680
2030
+ },
2031
+ {
2032
+ "epoch": 73.9514348785872,
2033
+ "learning_rate": 8.980836236933798e-07,
2034
+ "loss": 1.4393,
2035
+ "step": 6700
2036
+ },
2037
+ {
2038
+ "epoch": 74.17218543046357,
2039
+ "learning_rate": 8.977700348432056e-07,
2040
+ "loss": 1.3913,
2041
+ "step": 6720
2042
+ },
2043
+ {
2044
+ "epoch": 74.39293598233995,
2045
+ "learning_rate": 8.974564459930313e-07,
2046
+ "loss": 1.4949,
2047
+ "step": 6740
2048
+ },
2049
+ {
2050
+ "epoch": 74.61368653421634,
2051
+ "learning_rate": 8.971428571428571e-07,
2052
+ "loss": 1.4363,
2053
+ "step": 6760
2054
+ },
2055
+ {
2056
+ "epoch": 74.83443708609272,
2057
+ "learning_rate": 8.968292682926829e-07,
2058
+ "loss": 1.4655,
2059
+ "step": 6780
2060
+ },
2061
+ {
2062
+ "epoch": 75.0551876379691,
2063
+ "learning_rate": 8.965156794425087e-07,
2064
+ "loss": 1.4991,
2065
+ "step": 6800
2066
+ },
2067
+ {
2068
+ "epoch": 75.27593818984548,
2069
+ "learning_rate": 8.962020905923344e-07,
2070
+ "loss": 1.4516,
2071
+ "step": 6820
2072
+ },
2073
+ {
2074
+ "epoch": 75.49668874172185,
2075
+ "learning_rate": 8.958885017421603e-07,
2076
+ "loss": 1.5084,
2077
+ "step": 6840
2078
+ },
2079
+ {
2080
+ "epoch": 75.71743929359823,
2081
+ "learning_rate": 8.95574912891986e-07,
2082
+ "loss": 1.4542,
2083
+ "step": 6860
2084
+ },
2085
+ {
2086
+ "epoch": 75.93818984547461,
2087
+ "learning_rate": 8.952613240418118e-07,
2088
+ "loss": 1.475,
2089
+ "step": 6880
2090
+ },
2091
+ {
2092
+ "epoch": 76.15894039735099,
2093
+ "learning_rate": 8.949477351916375e-07,
2094
+ "loss": 1.4287,
2095
+ "step": 6900
2096
+ },
2097
+ {
2098
+ "epoch": 76.37969094922737,
2099
+ "learning_rate": 8.946341463414634e-07,
2100
+ "loss": 1.453,
2101
+ "step": 6920
2102
+ },
2103
+ {
2104
+ "epoch": 76.60044150110376,
2105
+ "learning_rate": 8.943205574912893e-07,
2106
+ "loss": 1.4372,
2107
+ "step": 6940
2108
+ },
2109
+ {
2110
+ "epoch": 76.82119205298014,
2111
+ "learning_rate": 8.940069686411149e-07,
2112
+ "loss": 1.4665,
2113
+ "step": 6960
2114
+ },
2115
+ {
2116
+ "epoch": 77.04194260485652,
2117
+ "learning_rate": 8.936933797909408e-07,
2118
+ "loss": 1.4841,
2119
+ "step": 6980
2120
+ },
2121
+ {
2122
+ "epoch": 77.2626931567329,
2123
+ "learning_rate": 8.933797909407665e-07,
2124
+ "loss": 1.4491,
2125
+ "step": 7000
2126
+ },
2127
+ {
2128
+ "epoch": 77.48344370860927,
2129
+ "learning_rate": 8.930662020905923e-07,
2130
+ "loss": 1.4382,
2131
+ "step": 7020
2132
+ },
2133
+ {
2134
+ "epoch": 77.70419426048565,
2135
+ "learning_rate": 8.92752613240418e-07,
2136
+ "loss": 1.4206,
2137
+ "step": 7040
2138
+ },
2139
+ {
2140
+ "epoch": 77.92494481236203,
2141
+ "learning_rate": 8.924390243902439e-07,
2142
+ "loss": 1.4521,
2143
+ "step": 7060
2144
+ },
2145
+ {
2146
+ "epoch": 78.1456953642384,
2147
+ "learning_rate": 8.921254355400697e-07,
2148
+ "loss": 1.4688,
2149
+ "step": 7080
2150
+ },
2151
+ {
2152
+ "epoch": 78.36644591611478,
2153
+ "learning_rate": 8.918118466898954e-07,
2154
+ "loss": 1.4304,
2155
+ "step": 7100
2156
+ },
2157
+ {
2158
+ "epoch": 78.58719646799118,
2159
+ "learning_rate": 8.914982578397212e-07,
2160
+ "loss": 1.4165,
2161
+ "step": 7120
2162
+ },
2163
+ {
2164
+ "epoch": 78.80794701986756,
2165
+ "learning_rate": 8.91184668989547e-07,
2166
+ "loss": 1.4728,
2167
+ "step": 7140
2168
+ },
2169
+ {
2170
+ "epoch": 79.02869757174393,
2171
+ "learning_rate": 8.908710801393728e-07,
2172
+ "loss": 1.4959,
2173
+ "step": 7160
2174
+ },
2175
+ {
2176
+ "epoch": 79.24944812362031,
2177
+ "learning_rate": 8.905574912891986e-07,
2178
+ "loss": 1.4211,
2179
+ "step": 7180
2180
+ },
2181
+ {
2182
+ "epoch": 79.47019867549669,
2183
+ "learning_rate": 8.902439024390244e-07,
2184
+ "loss": 1.4794,
2185
+ "step": 7200
2186
+ },
2187
+ {
2188
+ "epoch": 79.47019867549669,
2189
+ "eval_bleu": 44.4125,
2190
+ "eval_gen_len": 9.0667,
2191
+ "eval_loss": 2.0255990028381348,
2192
+ "eval_runtime": 3.4282,
2193
+ "eval_samples_per_second": 8.751,
2194
+ "eval_steps_per_second": 1.75,
2195
+ "step": 7200
2196
  }
2197
  ],
2198
  "logging_steps": 20,
 
2212
  "attributes": {}
2213
  }
2214
  },
2215
+ "total_flos": 2.128877387926733e+16,
2216
  "train_batch_size": 5,
2217
  "trial_name": null,
2218
  "trial_params": null