cima_ungrounded_joint_model / trainer_state.json
ndaheim's picture
initial commit
8ad164d
raw
history blame
18.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 16.99889502762431,
"global_step": 5763,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.38,
"gpu_memory": 2825061888,
"learning_rate": 8.32e-06,
"loss": 4.6062,
"step": 128
},
{
"epoch": 0.75,
"gpu_memory": 2903643648,
"learning_rate": 1.664e-05,
"loss": 2.7746,
"step": 256
},
{
"epoch": 1.0,
"eval_bp": 0.021341648192077716,
"eval_counts": [
342,
58,
18,
6
],
"eval_loss": 2.035790205001831,
"eval_precisions": [
34.862385321100916,
8.516886930983848,
4.651162790697675,
2.3529411764705883
],
"eval_ref_len": 4755,
"eval_runtime": 35.7733,
"eval_samples_per_second": 8.386,
"eval_score": 0.16113155714674393,
"eval_steps_per_second": 8.386,
"eval_sys_len": 981,
"eval_totals": [
981,
681,
387,
255
],
"gpu_memory": 2903643648,
"step": 339
},
{
"epoch": 1.13,
"gpu_memory": 2903643648,
"learning_rate": 2.4959999999999998e-05,
"loss": 2.2201,
"step": 384
},
{
"epoch": 1.51,
"gpu_memory": 2903643648,
"learning_rate": 3.2437898089171974e-05,
"loss": 1.9599,
"step": 512
},
{
"epoch": 1.89,
"gpu_memory": 2903643648,
"learning_rate": 3.1775477707006364e-05,
"loss": 1.8228,
"step": 640
},
{
"epoch": 2.0,
"eval_bp": 0.1919535866757935,
"eval_counts": [
640,
199,
91,
36
],
"eval_loss": 1.740516185760498,
"eval_precisions": [
35.67447045707915,
13.319946452476573,
7.526881720430108,
3.896103896103896
],
"eval_ref_len": 4755,
"eval_runtime": 51.5291,
"eval_samples_per_second": 5.822,
"eval_score": 2.0855597670386987,
"eval_steps_per_second": 5.822,
"eval_sys_len": 1794,
"eval_totals": [
1794,
1494,
1209,
924
],
"gpu_memory": 2903643648,
"step": 678
},
{
"epoch": 2.27,
"gpu_memory": 2903643648,
"learning_rate": 3.111305732484076e-05,
"loss": 1.7275,
"step": 768
},
{
"epoch": 2.64,
"gpu_memory": 2903643648,
"learning_rate": 3.0450636942675155e-05,
"loss": 1.614,
"step": 896
},
{
"epoch": 3.0,
"eval_bp": 0.157930307305936,
"eval_counts": [
662,
239,
127,
66
],
"eval_loss": 1.6653738021850586,
"eval_precisions": [
39.61699581089168,
17.432530999270604,
11.598173515981735,
8.02919708029197
],
"eval_ref_len": 4755,
"eval_runtime": 46.1755,
"eval_samples_per_second": 6.497,
"eval_score": 2.515019790343611,
"eval_steps_per_second": 6.497,
"eval_sys_len": 1671,
"eval_totals": [
1671,
1371,
1095,
822
],
"gpu_memory": 2903643648,
"step": 1017
},
{
"epoch": 3.02,
"gpu_memory": 2903643648,
"learning_rate": 2.9788216560509553e-05,
"loss": 1.561,
"step": 1024
},
{
"epoch": 3.4,
"gpu_memory": 2903643648,
"learning_rate": 2.9125796178343946e-05,
"loss": 1.4029,
"step": 1152
},
{
"epoch": 3.77,
"gpu_memory": 2903643648,
"learning_rate": 2.8463375796178344e-05,
"loss": 1.4541,
"step": 1280
},
{
"epoch": 4.0,
"eval_bp": 0.06814983706797134,
"eval_counts": [
481,
162,
79,
37
],
"eval_loss": 1.6631227731704712,
"eval_precisions": [
37.286821705426355,
16.363636363636363,
10.881542699724518,
6.630824372759856
],
"eval_ref_len": 4755,
"eval_runtime": 45.6186,
"eval_samples_per_second": 6.576,
"eval_score": 0.9871612910485801,
"eval_steps_per_second": 6.576,
"eval_sys_len": 1290,
"eval_totals": [
1290,
990,
726,
558
],
"gpu_memory": 2903643648,
"step": 1356
},
{
"epoch": 4.15,
"gpu_memory": 2903643648,
"learning_rate": 2.7800955414012737e-05,
"loss": 1.4088,
"step": 1408
},
{
"epoch": 4.53,
"gpu_memory": 2903643648,
"learning_rate": 2.713853503184713e-05,
"loss": 1.3351,
"step": 1536
},
{
"epoch": 4.91,
"gpu_memory": 2903643648,
"learning_rate": 2.647611464968153e-05,
"loss": 1.3229,
"step": 1664
},
{
"epoch": 5.0,
"eval_bp": 0.23640264658354365,
"eval_counts": [
633,
216,
105,
58
],
"eval_loss": 1.6731408834457397,
"eval_precisions": [
32.5115562403698,
13.114754098360656,
7.658643326039387,
5.239385727190605
],
"eval_ref_len": 4755,
"eval_runtime": 51.3595,
"eval_samples_per_second": 5.841,
"eval_score": 2.703708498377427,
"eval_steps_per_second": 5.841,
"eval_sys_len": 1947,
"eval_totals": [
1947,
1647,
1371,
1107
],
"gpu_memory": 2903643648,
"step": 1695
},
{
"epoch": 5.29,
"gpu_memory": 2903643648,
"learning_rate": 2.5813694267515922e-05,
"loss": 1.2429,
"step": 1792
},
{
"epoch": 5.66,
"gpu_memory": 2903643648,
"learning_rate": 2.515127388535032e-05,
"loss": 1.2329,
"step": 1920
},
{
"epoch": 6.0,
"eval_bp": 0.07532276614122083,
"eval_counts": [
579,
202,
98,
55
],
"eval_loss": 1.6539884805679321,
"eval_precisions": [
43.665158371040725,
19.68810916179337,
13.01460823373174,
9.499136442141623
],
"eval_ref_len": 4755,
"eval_runtime": 46.8126,
"eval_samples_per_second": 6.409,
"eval_score": 1.3600028829560191,
"eval_steps_per_second": 6.409,
"eval_sys_len": 1326,
"eval_totals": [
1326,
1026,
753,
579
],
"gpu_memory": 2903643648,
"step": 2034
},
{
"epoch": 6.04,
"gpu_memory": 2903643648,
"learning_rate": 2.4488853503184713e-05,
"loss": 1.2504,
"step": 2048
},
{
"epoch": 6.42,
"gpu_memory": 2903643648,
"learning_rate": 2.3826433121019104e-05,
"loss": 1.1421,
"step": 2176
},
{
"epoch": 6.8,
"gpu_memory": 2903643648,
"learning_rate": 2.31640127388535e-05,
"loss": 1.1795,
"step": 2304
},
{
"epoch": 7.0,
"eval_bp": 0.17181721996808308,
"eval_counts": [
768,
262,
133,
70
],
"eval_loss": 1.667359471321106,
"eval_precisions": [
44.599303135888505,
18.424753867791843,
11.697449428320141,
8.018327605956472
],
"eval_ref_len": 4755,
"eval_runtime": 50.5053,
"eval_samples_per_second": 5.94,
"eval_score": 2.862812289607837,
"eval_steps_per_second": 5.94,
"eval_sys_len": 1722,
"eval_totals": [
1722,
1422,
1137,
873
],
"gpu_memory": 2903643648,
"step": 2373
},
{
"epoch": 7.17,
"gpu_memory": 2903643648,
"learning_rate": 2.2501592356687895e-05,
"loss": 1.0902,
"step": 2432
},
{
"epoch": 7.55,
"gpu_memory": 2903643648,
"learning_rate": 2.183917197452229e-05,
"loss": 1.0705,
"step": 2560
},
{
"epoch": 7.93,
"gpu_memory": 2903643648,
"learning_rate": 2.1176751592356686e-05,
"loss": 1.1128,
"step": 2688
},
{
"epoch": 8.0,
"eval_bp": 0.2669632643662467,
"eval_counts": [
866,
300,
163,
96
],
"eval_loss": 1.708727240562439,
"eval_precisions": [
42.26451927769644,
17.152658662092623,
11.20274914089347,
8.226221079691516
],
"eval_ref_len": 4755,
"eval_runtime": 53.4181,
"eval_samples_per_second": 5.616,
"eval_score": 4.291998839505449,
"eval_steps_per_second": 5.616,
"eval_sys_len": 2049,
"eval_totals": [
2049,
1749,
1455,
1167
],
"gpu_memory": 2903643648,
"step": 2712
},
{
"epoch": 8.31,
"gpu_memory": 2903643648,
"learning_rate": 2.051433121019108e-05,
"loss": 1.0162,
"step": 2816
},
{
"epoch": 8.68,
"gpu_memory": 2903643648,
"learning_rate": 1.9851910828025477e-05,
"loss": 1.0183,
"step": 2944
},
{
"epoch": 9.0,
"eval_bp": 0.09731210069014802,
"eval_counts": [
678,
233,
102,
45
],
"eval_loss": 1.7135441303253174,
"eval_precisions": [
47.47899159663866,
20.656028368794328,
12.23021582733813,
7.142857142857143
],
"eval_ref_len": 4755,
"eval_runtime": 50.1778,
"eval_samples_per_second": 5.979,
"eval_score": 1.664870454299152,
"eval_steps_per_second": 5.979,
"eval_sys_len": 1428,
"eval_totals": [
1428,
1128,
834,
630
],
"gpu_memory": 2903643648,
"step": 3051
},
{
"epoch": 9.06,
"gpu_memory": 2903643648,
"learning_rate": 1.918949044585987e-05,
"loss": 1.0367,
"step": 3072
},
{
"epoch": 9.44,
"gpu_memory": 2903643648,
"learning_rate": 1.8527070063694264e-05,
"loss": 0.9645,
"step": 3200
},
{
"epoch": 9.82,
"gpu_memory": 2903643648,
"learning_rate": 1.786464968152866e-05,
"loss": 0.9616,
"step": 3328
},
{
"epoch": 10.0,
"eval_bp": 0.22930577411313655,
"eval_counts": [
768,
280,
145,
80
],
"eval_loss": 1.736754298210144,
"eval_precisions": [
39.93759750390016,
17.25200246457178,
10.837070254110612,
7.428040854224698
],
"eval_ref_len": 4755,
"eval_runtime": 57.956,
"eval_samples_per_second": 5.176,
"eval_score": 3.518980787396955,
"eval_steps_per_second": 5.176,
"eval_sys_len": 1923,
"eval_totals": [
1923,
1623,
1338,
1077
],
"gpu_memory": 2903643648,
"step": 3390
},
{
"epoch": 10.19,
"gpu_memory": 2903643648,
"learning_rate": 1.7202229299363055e-05,
"loss": 0.9403,
"step": 3456
},
{
"epoch": 10.57,
"gpu_memory": 2903643648,
"learning_rate": 1.6539808917197452e-05,
"loss": 0.9059,
"step": 3584
},
{
"epoch": 10.95,
"gpu_memory": 2903643648,
"learning_rate": 1.5877388535031846e-05,
"loss": 0.9249,
"step": 3712
},
{
"epoch": 11.0,
"eval_bp": 0.1751321349922995,
"eval_counts": [
748,
240,
115,
63
],
"eval_loss": 1.782728672027588,
"eval_precisions": [
43.13725490196079,
16.736401673640167,
10.008703220191471,
7.11864406779661
],
"eval_ref_len": 4755,
"eval_runtime": 54.5903,
"eval_samples_per_second": 5.495,
"eval_score": 2.6374744638290037,
"eval_steps_per_second": 5.495,
"eval_sys_len": 1734,
"eval_totals": [
1734,
1434,
1149,
885
],
"gpu_memory": 2903643648,
"step": 3729
},
{
"epoch": 11.33,
"gpu_memory": 2903643648,
"learning_rate": 1.5214968152866242e-05,
"loss": 0.8587,
"step": 3840
},
{
"epoch": 11.7,
"gpu_memory": 2903643648,
"learning_rate": 1.4552547770700635e-05,
"loss": 0.8739,
"step": 3968
},
{
"epoch": 12.0,
"eval_bp": 0.1555153512571023,
"eval_counts": [
739,
267,
125,
60
],
"eval_loss": 1.8148356676101685,
"eval_precisions": [
44.46450060168472,
19.60352422907489,
11.671335200746965,
7.462686567164179
],
"eval_ref_len": 4755,
"eval_runtime": 53.3032,
"eval_samples_per_second": 5.628,
"eval_score": 2.581452241674501,
"eval_steps_per_second": 5.628,
"eval_sys_len": 1662,
"eval_totals": [
1662,
1362,
1071,
804
],
"gpu_memory": 2903643648,
"step": 4068
},
{
"epoch": 12.08,
"gpu_memory": 2903643648,
"learning_rate": 1.3890127388535031e-05,
"loss": 0.8413,
"step": 4096
},
{
"epoch": 12.46,
"gpu_memory": 2903643648,
"learning_rate": 1.3227707006369426e-05,
"loss": 0.8195,
"step": 4224
},
{
"epoch": 12.84,
"gpu_memory": 2903643648,
"learning_rate": 1.2565286624203822e-05,
"loss": 0.823,
"step": 4352
},
{
"epoch": 13.0,
"eval_bp": 0.2187397058134024,
"eval_counts": [
843,
326,
173,
91
],
"eval_loss": 1.8146471977233887,
"eval_precisions": [
44.67408585055644,
20.5419029615627,
13.442113442113442,
9.027777777777779
],
"eval_ref_len": 4755,
"eval_runtime": 55.2439,
"eval_samples_per_second": 5.43,
"eval_score": 3.995892671984357,
"eval_steps_per_second": 5.43,
"eval_sys_len": 1887,
"eval_totals": [
1887,
1587,
1287,
1008
],
"gpu_memory": 2903643648,
"step": 4407
},
{
"epoch": 13.22,
"gpu_memory": 2903643648,
"learning_rate": 1.1902866242038214e-05,
"loss": 0.7992,
"step": 4480
},
{
"epoch": 13.59,
"gpu_memory": 2903643648,
"learning_rate": 1.124044585987261e-05,
"loss": 0.7702,
"step": 4608
},
{
"epoch": 13.97,
"gpu_memory": 2903643648,
"learning_rate": 1.0578025477707005e-05,
"loss": 0.7824,
"step": 4736
},
{
"epoch": 14.0,
"eval_bp": 0.16524048903893263,
"eval_counts": [
719,
244,
108,
52
],
"eval_loss": 1.8748054504394531,
"eval_precisions": [
42.34393404004712,
17.453505007153076,
9.72972972972973,
6.081871345029239
],
"eval_ref_len": 4755,
"eval_runtime": 54.7238,
"eval_samples_per_second": 5.482,
"eval_score": 2.389568242739576,
"eval_steps_per_second": 5.482,
"eval_sys_len": 1698,
"eval_totals": [
1698,
1398,
1110,
855
],
"gpu_memory": 2903643648,
"step": 4746
},
{
"epoch": 14.35,
"gpu_memory": 2903643648,
"learning_rate": 9.9156050955414e-06,
"loss": 0.7425,
"step": 4864
},
{
"epoch": 14.72,
"gpu_memory": 2903643648,
"learning_rate": 9.253184713375794e-06,
"loss": 0.7501,
"step": 4992
},
{
"epoch": 15.0,
"eval_bp": 0.1953640836862138,
"eval_counts": [
762,
263,
131,
74
],
"eval_loss": 1.9026106595993042,
"eval_precisions": [
42.19269102990033,
17.46347941567065,
10.835401157981803,
7.781282860147213
],
"eval_ref_len": 4755,
"eval_runtime": 56.8759,
"eval_samples_per_second": 5.275,
"eval_score": 3.0843295492719487,
"eval_steps_per_second": 5.275,
"eval_sys_len": 1806,
"eval_totals": [
1806,
1506,
1209,
951
],
"gpu_memory": 2903643648,
"step": 5085
},
{
"epoch": 15.1,
"gpu_memory": 2903643648,
"learning_rate": 8.59076433121019e-06,
"loss": 0.7315,
"step": 5120
},
{
"epoch": 15.48,
"gpu_memory": 2903643648,
"learning_rate": 7.928343949044585e-06,
"loss": 0.7011,
"step": 5248
},
{
"epoch": 15.86,
"gpu_memory": 2903643648,
"learning_rate": 7.265923566878981e-06,
"loss": 0.7139,
"step": 5376
},
{
"epoch": 16.0,
"eval_bp": 0.23551335586741148,
"eval_counts": [
816,
277,
129,
72
],
"eval_loss": 1.9286922216415405,
"eval_precisions": [
41.97530864197531,
16.849148418491485,
9.57683741648107,
6.70391061452514
],
"eval_ref_len": 4755,
"eval_runtime": 58.3566,
"eval_samples_per_second": 5.141,
"eval_score": 3.4379225352028846,
"eval_steps_per_second": 5.141,
"eval_sys_len": 1944,
"eval_totals": [
1944,
1644,
1347,
1074
],
"gpu_memory": 2903643648,
"step": 5424
},
{
"epoch": 16.24,
"gpu_memory": 2903643648,
"learning_rate": 6.6035031847133755e-06,
"loss": 0.689,
"step": 5504
},
{
"epoch": 16.61,
"gpu_memory": 2903643648,
"learning_rate": 5.94108280254777e-06,
"loss": 0.6788,
"step": 5632
},
{
"epoch": 16.99,
"gpu_memory": 2903643648,
"learning_rate": 5.278662420382165e-06,
"loss": 0.7053,
"step": 5760
},
{
"epoch": 17.0,
"eval_bp": 0.2934278208519596,
"eval_counts": [
886,
340,
171,
99
],
"eval_loss": 1.9354726076126099,
"eval_precisions": [
41.47940074906367,
18.51851851851852,
11.089494163424124,
7.746478873239437
],
"eval_ref_len": 4755,
"eval_runtime": 60.6492,
"eval_samples_per_second": 4.946,
"eval_score": 4.702891790634525,
"eval_steps_per_second": 4.946,
"eval_sys_len": 2136,
"eval_totals": [
2136,
1836,
1542,
1278
],
"gpu_memory": 2903643648,
"step": 5763
}
],
"max_steps": 6780,
"num_train_epochs": 20,
"total_flos": 1765580040806400.0,
"trial_name": null,
"trial_params": null
}