|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.739096280610677, |
|
"global_step": 21000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00015, |
|
"loss": 46.3847, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 6.927323312955705, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 16.81111717224121, |
|
"eval_validation_meteor": 0.06701502044580518, |
|
"eval_validation_runtime": 40.1943, |
|
"eval_validation_samples_per_second": 24.805, |
|
"eval_validation_scarebleu_bp": 1.0, |
|
"eval_validation_scarebleu_counts": [ |
|
2501, |
|
140, |
|
21, |
|
3 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
9.256106587712805, |
|
0.5373248896564958, |
|
0.08352889702080267, |
|
0.01236603462489695 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 0.26772142473423427, |
|
"eval_validation_scarebleu_sys_len": 27020, |
|
"eval_validation_scarebleu_totals": [ |
|
27020, |
|
26055, |
|
25141, |
|
24260 |
|
], |
|
"eval_validation_steps_per_second": 0.398, |
|
"eval_validation_ter_num_edits": 24081, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 143.9476358419511, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 6.879629526302676, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 16.940214157104492, |
|
"eval_test_meteor": 0.06578125012212357, |
|
"eval_test_runtime": 43.5255, |
|
"eval_test_samples_per_second": 23.251, |
|
"eval_test_scarebleu_bp": 1.0, |
|
"eval_test_scarebleu_counts": [ |
|
2620, |
|
181, |
|
21, |
|
1 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
8.875639418679494, |
|
0.6341087443946188, |
|
0.07605113533480606, |
|
0.0037420948246828577 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 0.20005341558923276, |
|
"eval_test_scarebleu_sys_len": 29519, |
|
"eval_test_scarebleu_totals": [ |
|
29519, |
|
28544, |
|
27613, |
|
26723 |
|
], |
|
"eval_test_steps_per_second": 0.368, |
|
"eval_test_ter_num_edits": 25915, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 149.21978464904703, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0003, |
|
"loss": 19.9672, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 6.608664701301832, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 9.209909439086914, |
|
"eval_validation_meteor": 0.06402750263419245, |
|
"eval_validation_runtime": 39.5005, |
|
"eval_validation_samples_per_second": 25.24, |
|
"eval_validation_scarebleu_bp": 0.7877829309778815, |
|
"eval_validation_scarebleu_counts": [ |
|
2144, |
|
200, |
|
35, |
|
2 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
13.677830940988835, |
|
1.352356481168436, |
|
0.2506624650862995, |
|
0.015055706112616682 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 0.4049312368014021, |
|
"eval_validation_scarebleu_sys_len": 15675, |
|
"eval_validation_scarebleu_totals": [ |
|
15675, |
|
14789, |
|
13963, |
|
13284 |
|
], |
|
"eval_validation_steps_per_second": 0.405, |
|
"eval_validation_ter_num_edits": 19406, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 116.00215195170065, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 6.872536195708606, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 9.260172843933105, |
|
"eval_test_meteor": 0.06539736281948932, |
|
"eval_test_runtime": 42.6967, |
|
"eval_test_samples_per_second": 23.702, |
|
"eval_test_scarebleu_bp": 0.8237885182366754, |
|
"eval_test_scarebleu_counts": [ |
|
2294, |
|
254, |
|
51, |
|
9 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
13.532326569136385, |
|
1.582258767831558, |
|
0.33537186821858356, |
|
0.06213324128408699 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 0.6732725214400193, |
|
"eval_test_scarebleu_sys_len": 16952, |
|
"eval_test_scarebleu_totals": [ |
|
16952, |
|
16053, |
|
15207, |
|
14485 |
|
], |
|
"eval_test_steps_per_second": 0.375, |
|
"eval_test_ter_num_edits": 20720, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 119.30673115679163, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0002999986101989992, |
|
"loss": 8.7409, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 13.23574850542808, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 6.500675201416016, |
|
"eval_validation_meteor": 0.1196379582028911, |
|
"eval_validation_runtime": 40.0578, |
|
"eval_validation_samples_per_second": 24.889, |
|
"eval_validation_scarebleu_bp": 1.0, |
|
"eval_validation_scarebleu_counts": [ |
|
4007, |
|
528, |
|
99, |
|
14 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
12.693635758862102, |
|
1.7262799973844243, |
|
0.334256195556756, |
|
0.04885538805136795 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 0.7734322925143389, |
|
"eval_validation_scarebleu_sys_len": 31567, |
|
"eval_validation_scarebleu_totals": [ |
|
31567, |
|
30586, |
|
29618, |
|
28656 |
|
], |
|
"eval_validation_steps_per_second": 0.399, |
|
"eval_validation_ter_num_edits": 22118, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 132.21352142985234, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 13.044816904787435, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 6.5189080238342285, |
|
"eval_test_meteor": 0.11759299622619233, |
|
"eval_test_runtime": 39.9506, |
|
"eval_test_samples_per_second": 25.331, |
|
"eval_test_scarebleu_bp": 1.0, |
|
"eval_test_scarebleu_counts": [ |
|
4139, |
|
590, |
|
121, |
|
25 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
12.661364331599877, |
|
1.8612574529164958, |
|
0.39385456675997654, |
|
0.08403361344537816 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 0.9397643584535246, |
|
"eval_test_scarebleu_sys_len": 32690, |
|
"eval_test_scarebleu_totals": [ |
|
32690, |
|
31699, |
|
30722, |
|
29750 |
|
], |
|
"eval_test_steps_per_second": 0.4, |
|
"eval_test_ter_num_edits": 23506, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 135.34864973800887, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00029999444082175083, |
|
"loss": 5.8284, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 15.697678446835747, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 5.613649368286133, |
|
"eval_validation_meteor": 0.14151503184835326, |
|
"eval_validation_runtime": 37.324, |
|
"eval_validation_samples_per_second": 26.712, |
|
"eval_validation_scarebleu_bp": 1.0, |
|
"eval_validation_scarebleu_counts": [ |
|
4311, |
|
658, |
|
141, |
|
30 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
21.276280722534793, |
|
3.413925495486147, |
|
0.7710395362826051, |
|
0.17339035949601203 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 1.7652768928036136, |
|
"eval_validation_scarebleu_sys_len": 20262, |
|
"eval_validation_scarebleu_totals": [ |
|
20262, |
|
19274, |
|
18287, |
|
17302 |
|
], |
|
"eval_validation_steps_per_second": 0.429, |
|
"eval_validation_ter_num_edits": 18183, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 108.69149381313885, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 15.824281404133039, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 5.644526481628418, |
|
"eval_test_meteor": 0.14262643610521655, |
|
"eval_test_runtime": 37.2514, |
|
"eval_test_samples_per_second": 27.167, |
|
"eval_test_scarebleu_bp": 1.0, |
|
"eval_test_scarebleu_counts": [ |
|
4480, |
|
750, |
|
151, |
|
21 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
21.725425537073857, |
|
3.8230196758079313, |
|
0.8109995166228047, |
|
0.11913541725761616 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 1.683096735458044, |
|
"eval_test_scarebleu_sys_len": 20621, |
|
"eval_test_scarebleu_totals": [ |
|
20621, |
|
19618, |
|
18619, |
|
17627 |
|
], |
|
"eval_test_steps_per_second": 0.43, |
|
"eval_test_ter_num_edits": 18915, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 108.91345655553637, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0002999874919455162, |
|
"loss": 4.9666, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 19.94602423909563, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 5.06654691696167, |
|
"eval_validation_meteor": 0.16677714234198524, |
|
"eval_validation_runtime": 35.8549, |
|
"eval_validation_samples_per_second": 27.806, |
|
"eval_validation_scarebleu_bp": 1.0, |
|
"eval_validation_scarebleu_counts": [ |
|
5239, |
|
885, |
|
180, |
|
35 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
20.370154360589446, |
|
3.579807458943451, |
|
0.7586933614330874, |
|
0.1539815222173339 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 1.70843012306825, |
|
"eval_validation_scarebleu_sys_len": 25719, |
|
"eval_validation_scarebleu_totals": [ |
|
25719, |
|
24722, |
|
23725, |
|
22730 |
|
], |
|
"eval_validation_steps_per_second": 0.446, |
|
"eval_validation_ter_num_edits": 20131, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 120.3359435710443, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 19.73223665848102, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 5.101978302001953, |
|
"eval_test_meteor": 0.16941489704708929, |
|
"eval_test_runtime": 40.2976, |
|
"eval_test_samples_per_second": 25.113, |
|
"eval_test_scarebleu_bp": 1.0, |
|
"eval_test_scarebleu_counts": [ |
|
5479, |
|
997, |
|
226, |
|
45 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
19.717853672580702, |
|
3.723622782446312, |
|
0.8771929824561403, |
|
0.18179614592170645 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 1.8498064298140473, |
|
"eval_test_scarebleu_sys_len": 27787, |
|
"eval_test_scarebleu_totals": [ |
|
27787, |
|
26775, |
|
25764, |
|
24753 |
|
], |
|
"eval_test_steps_per_second": 0.397, |
|
"eval_test_ter_num_edits": 21806, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 125.55997005815627, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00029997776369906286, |
|
"loss": 4.4311, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 24.59127986581791, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 4.524135112762451, |
|
"eval_validation_meteor": 0.2102906308603433, |
|
"eval_validation_runtime": 31.5423, |
|
"eval_validation_samples_per_second": 31.608, |
|
"eval_validation_scarebleu_bp": 1.0, |
|
"eval_validation_scarebleu_counts": [ |
|
5964, |
|
1335, |
|
396, |
|
121 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
28.030267424918925, |
|
6.5828402366863905, |
|
2.053622361665716, |
|
0.6617084108060811 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 3.979302446849127, |
|
"eval_validation_scarebleu_sys_len": 21277, |
|
"eval_validation_scarebleu_totals": [ |
|
21277, |
|
20280, |
|
19283, |
|
18286 |
|
], |
|
"eval_validation_steps_per_second": 0.507, |
|
"eval_validation_ter_num_edits": 17139, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 102.450833881284, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 24.41919902051296, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 4.5522613525390625, |
|
"eval_test_meteor": 0.2072945229916662, |
|
"eval_test_runtime": 34.2408, |
|
"eval_test_samples_per_second": 29.555, |
|
"eval_test_scarebleu_bp": 1.0, |
|
"eval_test_scarebleu_counts": [ |
|
6159, |
|
1354, |
|
372, |
|
84 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
27.975109011627907, |
|
6.446391163587888, |
|
1.8607442977190876, |
|
0.44252449689179224 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 3.4908252929483026, |
|
"eval_test_scarebleu_sys_len": 22016, |
|
"eval_test_scarebleu_totals": [ |
|
22016, |
|
21004, |
|
19992, |
|
18982 |
|
], |
|
"eval_test_steps_per_second": 0.467, |
|
"eval_test_ter_num_edits": 17741, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 102.15350952956757, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00029996525626266166, |
|
"loss": 3.8914, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 28.001456752933812, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 4.09087610244751, |
|
"eval_validation_meteor": 0.246925162242717, |
|
"eval_validation_runtime": 25.3477, |
|
"eval_validation_samples_per_second": 39.333, |
|
"eval_validation_scarebleu_bp": 0.9367877834918877, |
|
"eval_validation_scarebleu_counts": [ |
|
6414, |
|
1774, |
|
615, |
|
220 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
35.19534679543459, |
|
10.297190619921059, |
|
3.7888122227698373, |
|
1.4439485429246521 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 6.251092400807247, |
|
"eval_validation_scarebleu_sys_len": 18224, |
|
"eval_validation_scarebleu_totals": [ |
|
18224, |
|
17228, |
|
16232, |
|
15236 |
|
], |
|
"eval_validation_steps_per_second": 0.631, |
|
"eval_validation_ter_num_edits": 14680, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 87.75180823719289, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 27.737380160414922, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 4.100297451019287, |
|
"eval_test_meteor": 0.23872126072037225, |
|
"eval_test_runtime": 27.793, |
|
"eval_test_samples_per_second": 36.412, |
|
"eval_test_scarebleu_bp": 0.9496588509867531, |
|
"eval_test_scarebleu_counts": [ |
|
6585, |
|
1745, |
|
575, |
|
175 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
34.21845770110164, |
|
9.571083808688021, |
|
3.3391405342624854, |
|
1.0796471096304523 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 5.566755396990573, |
|
"eval_test_scarebleu_sys_len": 19244, |
|
"eval_test_scarebleu_totals": [ |
|
19244, |
|
18232, |
|
17220, |
|
16209 |
|
], |
|
"eval_test_steps_per_second": 0.576, |
|
"eval_test_ter_num_edits": 15506, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 89.28427477399666, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.000299949969868084, |
|
"loss": 3.4344, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 31.29398951552832, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 3.7306714057922363, |
|
"eval_validation_meteor": 0.27820026125205044, |
|
"eval_validation_runtime": 18.376, |
|
"eval_validation_samples_per_second": 54.256, |
|
"eval_validation_scarebleu_bp": 0.9889681301158993, |
|
"eval_validation_scarebleu_counts": [ |
|
7069, |
|
2158, |
|
789, |
|
302 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
36.81579084422686, |
|
11.854537464293562, |
|
4.585343174289533, |
|
1.8630475015422578 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 7.727889909602072, |
|
"eval_validation_scarebleu_sys_len": 19201, |
|
"eval_validation_scarebleu_totals": [ |
|
19201, |
|
18204, |
|
17207, |
|
16210 |
|
], |
|
"eval_validation_steps_per_second": 0.871, |
|
"eval_validation_ter_num_edits": 14351, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 85.78516348855281, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 31.913351815650685, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 3.728149890899658, |
|
"eval_test_meteor": 0.2797376842090434, |
|
"eval_test_runtime": 22.9959, |
|
"eval_test_samples_per_second": 44.008, |
|
"eval_test_scarebleu_bp": 0.9872701879268588, |
|
"eval_test_scarebleu_counts": [ |
|
7428, |
|
2283, |
|
851, |
|
319 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
37.17345611049945, |
|
12.034791776489193, |
|
4.738835059583472, |
|
1.882450135725245 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 7.846982277886522, |
|
"eval_test_scarebleu_sys_len": 19982, |
|
"eval_test_scarebleu_totals": [ |
|
19982, |
|
18970, |
|
17958, |
|
16946 |
|
], |
|
"eval_test_steps_per_second": 0.696, |
|
"eval_test_ter_num_edits": 14894, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 85.76035008924973, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.0002999319047985972, |
|
"loss": 3.1209, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 33.55090391489323, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 3.4695987701416016, |
|
"eval_validation_meteor": 0.2976359335029889, |
|
"eval_validation_runtime": 21.7643, |
|
"eval_validation_samples_per_second": 45.809, |
|
"eval_validation_scarebleu_bp": 1.0, |
|
"eval_validation_scarebleu_counts": [ |
|
7463, |
|
2467, |
|
1011, |
|
418 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
37.90056370930882, |
|
13.196747619557078, |
|
5.712832683505679, |
|
2.502994011976048 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 9.196148464160226, |
|
"eval_validation_scarebleu_sys_len": 19691, |
|
"eval_validation_scarebleu_totals": [ |
|
19691, |
|
18694, |
|
17697, |
|
16700 |
|
], |
|
"eval_validation_steps_per_second": 0.735, |
|
"eval_validation_ter_num_edits": 14205, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 84.9124275210712, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 33.9716211383833, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 3.4928810596466064, |
|
"eval_test_meteor": 0.3022870093526768, |
|
"eval_test_runtime": 25.0697, |
|
"eval_test_samples_per_second": 40.367, |
|
"eval_test_scarebleu_bp": 1.0, |
|
"eval_test_scarebleu_counts": [ |
|
7869, |
|
2633, |
|
1054, |
|
427 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
38.3741343996879, |
|
13.506720016415308, |
|
5.702846012336328, |
|
2.444190040068689 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 9.219423736514582, |
|
"eval_test_scarebleu_sys_len": 20506, |
|
"eval_test_scarebleu_totals": [ |
|
20506, |
|
19494, |
|
18482, |
|
17470 |
|
], |
|
"eval_test_steps_per_second": 0.638, |
|
"eval_test_ter_num_edits": 14739, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 84.86785282432199, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.00029991106138895916, |
|
"loss": 2.9118, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 34.57697226346535, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 3.2827696800231934, |
|
"eval_validation_meteor": 0.30655372675156295, |
|
"eval_validation_runtime": 21.2242, |
|
"eval_validation_samples_per_second": 46.975, |
|
"eval_validation_scarebleu_bp": 1.0, |
|
"eval_validation_scarebleu_counts": [ |
|
7614, |
|
2604, |
|
1072, |
|
445 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
39.094269870609985, |
|
14.091671627252557, |
|
6.132021507836632, |
|
2.6994237185319987 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 9.772092910936896, |
|
"eval_validation_scarebleu_sys_len": 19476, |
|
"eval_validation_scarebleu_totals": [ |
|
19476, |
|
18479, |
|
17482, |
|
16485 |
|
], |
|
"eval_validation_steps_per_second": 0.754, |
|
"eval_validation_ter_num_edits": 13943, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 83.34628489449459, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 35.254931037556965, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 3.296231985092163, |
|
"eval_test_meteor": 0.3139488805447367, |
|
"eval_test_runtime": 23.0481, |
|
"eval_test_samples_per_second": 43.908, |
|
"eval_test_scarebleu_bp": 0.9980710741204407, |
|
"eval_test_scarebleu_counts": [ |
|
8108, |
|
2823, |
|
1184, |
|
513 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
40.14060101985247, |
|
14.713086985980091, |
|
6.514442916093535, |
|
2.9889879391714733 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 10.335553726245266, |
|
"eval_test_scarebleu_sys_len": 20199, |
|
"eval_test_scarebleu_totals": [ |
|
20199, |
|
19187, |
|
18175, |
|
17163 |
|
], |
|
"eval_test_steps_per_second": 0.694, |
|
"eval_test_ter_num_edits": 14363, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 82.70282720101342, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.0002998874400254125, |
|
"loss": 2.7563, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 35.922822688896666, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 3.1191513538360596, |
|
"eval_validation_meteor": 0.3225733936623777, |
|
"eval_validation_runtime": 18.9973, |
|
"eval_validation_samples_per_second": 52.481, |
|
"eval_validation_scarebleu_bp": 0.9987114400155332, |
|
"eval_validation_scarebleu_counts": [ |
|
7893, |
|
2765, |
|
1154, |
|
506 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
40.70864923410181, |
|
15.033710308829926, |
|
6.634090255820638, |
|
3.085742163678497 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 10.56607027026474, |
|
"eval_validation_scarebleu_sys_len": 19389, |
|
"eval_validation_scarebleu_totals": [ |
|
19389, |
|
18392, |
|
17395, |
|
16398 |
|
], |
|
"eval_validation_steps_per_second": 0.842, |
|
"eval_validation_ter_num_edits": 13631, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 81.4812600872736, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 36.3162560210105, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 3.134953737258911, |
|
"eval_test_meteor": 0.3246997818548813, |
|
"eval_test_runtime": 22.6178, |
|
"eval_test_samples_per_second": 44.744, |
|
"eval_test_scarebleu_bp": 1.0, |
|
"eval_test_scarebleu_counts": [ |
|
8246, |
|
2958, |
|
1263, |
|
563 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
40.70088845014808, |
|
15.36783042394015, |
|
6.925860934415442, |
|
3.2686948444031585 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 10.90852794539211, |
|
"eval_test_scarebleu_sys_len": 20260, |
|
"eval_test_scarebleu_totals": [ |
|
20260, |
|
19248, |
|
18236, |
|
17224 |
|
], |
|
"eval_test_steps_per_second": 0.707, |
|
"eval_test_ter_num_edits": 14265, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 82.13853860770428, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.0002998610411456772, |
|
"loss": 2.5851, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 36.76732059324029, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 2.992377996444702, |
|
"eval_validation_meteor": 0.3322433302283302, |
|
"eval_validation_runtime": 19.5358, |
|
"eval_validation_samples_per_second": 51.034, |
|
"eval_validation_scarebleu_bp": 1.0, |
|
"eval_validation_scarebleu_counts": [ |
|
8059, |
|
2918, |
|
1246, |
|
534 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
41.24782475176579, |
|
15.738093953939917, |
|
7.102143182854538, |
|
3.2269760696156635 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 11.044208276358312, |
|
"eval_validation_scarebleu_sys_len": 19538, |
|
"eval_validation_scarebleu_totals": [ |
|
19538, |
|
18541, |
|
17544, |
|
16548 |
|
], |
|
"eval_validation_steps_per_second": 0.819, |
|
"eval_validation_ter_num_edits": 13514, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 80.78187578456573, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 37.26551462594263, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 3.0072576999664307, |
|
"eval_test_meteor": 0.333414935567908, |
|
"eval_test_runtime": 23.446, |
|
"eval_test_samples_per_second": 43.163, |
|
"eval_test_scarebleu_bp": 1.0, |
|
"eval_test_scarebleu_counts": [ |
|
8465, |
|
3091, |
|
1327, |
|
584 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
41.44836703716398, |
|
15.923960640873732, |
|
7.212348497200935, |
|
3.3588313107494105 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 11.244906644276366, |
|
"eval_test_scarebleu_sys_len": 20423, |
|
"eval_test_scarebleu_totals": [ |
|
20423, |
|
19411, |
|
18399, |
|
17387 |
|
], |
|
"eval_test_steps_per_second": 0.682, |
|
"eval_test_ter_num_edits": 14135, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 81.38999251453907, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 0.00029983186523894237, |
|
"loss": 2.4949, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 37.97775302751643, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 2.9026131629943848, |
|
"eval_validation_meteor": 0.34218163692855424, |
|
"eval_validation_runtime": 18.3627, |
|
"eval_validation_samples_per_second": 54.295, |
|
"eval_validation_scarebleu_bp": 1.0, |
|
"eval_validation_scarebleu_counts": [ |
|
8292, |
|
3082, |
|
1350, |
|
611 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
42.28886168910649, |
|
16.56009886626189, |
|
7.664357897127285, |
|
3.6769573328518987 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 11.852591640319904, |
|
"eval_validation_scarebleu_sys_len": 19608, |
|
"eval_validation_scarebleu_totals": [ |
|
19608, |
|
18611, |
|
17614, |
|
16617 |
|
], |
|
"eval_validation_steps_per_second": 0.871, |
|
"eval_validation_ter_num_edits": 13331, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 79.68796700340725, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 38.64688755179878, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 2.9025168418884277, |
|
"eval_test_meteor": 0.3488289320983426, |
|
"eval_test_runtime": 23.8956, |
|
"eval_test_samples_per_second": 42.351, |
|
"eval_test_scarebleu_bp": 1.0, |
|
"eval_test_scarebleu_counts": [ |
|
8783, |
|
3333, |
|
1490, |
|
682 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
42.666990527082824, |
|
17.02855975067695, |
|
8.027584720650827, |
|
3.8862613254316485 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 12.270060576985161, |
|
"eval_test_scarebleu_sys_len": 20585, |
|
"eval_test_scarebleu_totals": [ |
|
20585, |
|
19573, |
|
18561, |
|
17549 |
|
], |
|
"eval_test_steps_per_second": 0.67, |
|
"eval_test_ter_num_edits": 13913, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 80.11170610928772, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.0002997999128458575, |
|
"loss": 2.4153, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 38.410061864122575, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 2.802281141281128, |
|
"eval_validation_meteor": 0.3482320817009387, |
|
"eval_validation_runtime": 20.1679, |
|
"eval_validation_samples_per_second": 49.435, |
|
"eval_validation_scarebleu_bp": 1.0, |
|
"eval_validation_scarebleu_counts": [ |
|
8386, |
|
3169, |
|
1413, |
|
644 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
42.807554874936194, |
|
17.044048835583283, |
|
8.030234144123664, |
|
3.879751792276643 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 12.278811597675354, |
|
"eval_validation_scarebleu_sys_len": 19590, |
|
"eval_validation_scarebleu_totals": [ |
|
19590, |
|
18593, |
|
17596, |
|
16599 |
|
], |
|
"eval_validation_steps_per_second": 0.793, |
|
"eval_validation_ter_num_edits": 13285, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 79.41299539721442, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 39.18817112230258, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 2.8074586391448975, |
|
"eval_test_meteor": 0.35206418879072193, |
|
"eval_test_runtime": 22.9097, |
|
"eval_test_samples_per_second": 44.173, |
|
"eval_test_scarebleu_bp": 1.0, |
|
"eval_test_scarebleu_counts": [ |
|
8797, |
|
3403, |
|
1547, |
|
713 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
42.6273198623831, |
|
17.34012738853503, |
|
8.311395261376457, |
|
4.0509061985114485 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 12.560056952808758, |
|
"eval_test_scarebleu_sys_len": 20637, |
|
"eval_test_scarebleu_totals": [ |
|
20637, |
|
19625, |
|
18613, |
|
17601 |
|
], |
|
"eval_test_steps_per_second": 0.698, |
|
"eval_test_ter_num_edits": 13882, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 79.93320665630218, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.000299765184558522, |
|
"loss": 2.331, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 39.13729411872558, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 2.7163193225860596, |
|
"eval_validation_meteor": 0.354395671848595, |
|
"eval_validation_runtime": 19.2961, |
|
"eval_validation_samples_per_second": 51.668, |
|
"eval_validation_scarebleu_bp": 1.0, |
|
"eval_validation_scarebleu_counts": [ |
|
8486, |
|
3263, |
|
1446, |
|
664 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
43.388894569996936, |
|
17.5798717741501, |
|
8.23274880437258, |
|
4.007967646526227 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 12.595512408378779, |
|
"eval_validation_scarebleu_sys_len": 19558, |
|
"eval_validation_scarebleu_totals": [ |
|
19558, |
|
18561, |
|
17564, |
|
16567 |
|
], |
|
"eval_validation_steps_per_second": 0.829, |
|
"eval_validation_ter_num_edits": 13087, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 78.22942196186263, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 39.693895702702456, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 2.730414390563965, |
|
"eval_test_meteor": 0.35872327155724143, |
|
"eval_test_runtime": 23.3127, |
|
"eval_test_samples_per_second": 43.41, |
|
"eval_test_scarebleu_bp": 1.0, |
|
"eval_test_scarebleu_counts": [ |
|
8979, |
|
3479, |
|
1580, |
|
745 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
43.72321776392676, |
|
17.819094447859044, |
|
8.535004321521175, |
|
4.257142857142857 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 12.971193222930435, |
|
"eval_test_scarebleu_sys_len": 20536, |
|
"eval_test_scarebleu_totals": [ |
|
20536, |
|
19524, |
|
18512, |
|
17500 |
|
], |
|
"eval_test_steps_per_second": 0.686, |
|
"eval_test_ter_num_edits": 13688, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 78.81614556342488, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.00029972768102047483, |
|
"loss": 2.2516, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 39.53731351221887, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 2.65919828414917, |
|
"eval_validation_meteor": 0.35875663369715244, |
|
"eval_validation_runtime": 18.3314, |
|
"eval_validation_samples_per_second": 54.387, |
|
"eval_validation_scarebleu_bp": 1.0, |
|
"eval_validation_scarebleu_counts": [ |
|
8559, |
|
3305, |
|
1499, |
|
688 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
43.7442502299908, |
|
17.79848133986752, |
|
8.530616890507625, |
|
4.150829562594269 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 12.885605055056248, |
|
"eval_validation_scarebleu_sys_len": 19566, |
|
"eval_validation_scarebleu_totals": [ |
|
19566, |
|
18569, |
|
17572, |
|
16575 |
|
], |
|
"eval_validation_steps_per_second": 0.873, |
|
"eval_validation_ter_num_edits": 13032, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 77.9006515631538, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 40.0170544194821, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 2.6628172397613525, |
|
"eval_test_meteor": 0.364117400692827, |
|
"eval_test_runtime": 20.9827, |
|
"eval_test_samples_per_second": 48.23, |
|
"eval_test_scarebleu_bp": 1.0, |
|
"eval_test_scarebleu_counts": [ |
|
9039, |
|
3538, |
|
1634, |
|
772 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
44.11420204978038, |
|
18.164082554677073, |
|
8.848694898732806, |
|
4.423054887131889 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 13.307557800740135, |
|
"eval_test_scarebleu_sys_len": 20490, |
|
"eval_test_scarebleu_totals": [ |
|
20490, |
|
19478, |
|
18466, |
|
17454 |
|
], |
|
"eval_test_steps_per_second": 0.763, |
|
"eval_test_ter_num_edits": 13615, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 78.39580814187828, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 0.00029968740292668196, |
|
"loss": 2.213, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 40.099777569436895, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 2.612889051437378, |
|
"eval_validation_meteor": 0.3648782076482497, |
|
"eval_validation_runtime": 18.3903, |
|
"eval_validation_samples_per_second": 54.213, |
|
"eval_validation_scarebleu_bp": 1.0, |
|
"eval_validation_scarebleu_counts": [ |
|
8663, |
|
3436, |
|
1573, |
|
730 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
43.93002028397566, |
|
18.351759867542594, |
|
8.873970438903307, |
|
4.363679837408093 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 13.292372233680728, |
|
"eval_validation_scarebleu_sys_len": 19720, |
|
"eval_validation_scarebleu_totals": [ |
|
19720, |
|
18723, |
|
17726, |
|
16729 |
|
], |
|
"eval_validation_steps_per_second": 0.87, |
|
"eval_validation_ter_num_edits": 12962, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 77.48221651025166, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 40.94861131064201, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 2.625495195388794, |
|
"eval_test_meteor": 0.37167048896927946, |
|
"eval_test_runtime": 23.8134, |
|
"eval_test_samples_per_second": 42.497, |
|
"eval_test_scarebleu_bp": 1.0, |
|
"eval_test_scarebleu_counts": [ |
|
9193, |
|
3711, |
|
1745, |
|
853 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
44.511693216481866, |
|
18.894149992362916, |
|
9.367115787213484, |
|
4.841914060282681 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 13.975138151336937, |
|
"eval_test_scarebleu_sys_len": 20653, |
|
"eval_test_scarebleu_totals": [ |
|
20653, |
|
19641, |
|
18629, |
|
17617 |
|
], |
|
"eval_test_steps_per_second": 0.672, |
|
"eval_test_ter_num_edits": 13520, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 77.84879368918062, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 0.00029964435102352384, |
|
"loss": 2.172, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 40.57571982774164, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 2.5481536388397217, |
|
"eval_validation_meteor": 0.3734694399709547, |
|
"eval_validation_runtime": 19.2312, |
|
"eval_validation_samples_per_second": 51.843, |
|
"eval_validation_scarebleu_bp": 1.0, |
|
"eval_validation_scarebleu_counts": [ |
|
8738, |
|
3503, |
|
1625, |
|
771 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
44.85165794066317, |
|
18.950500405734378, |
|
9.292086001829826, |
|
4.675277424049481 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 13.862129372426606, |
|
"eval_validation_scarebleu_sys_len": 19482, |
|
"eval_validation_scarebleu_totals": [ |
|
19482, |
|
18485, |
|
17488, |
|
16491 |
|
], |
|
"eval_validation_steps_per_second": 0.832, |
|
"eval_validation_ter_num_edits": 12785, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 76.42417359077052, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 41.3542278861197, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 2.5593228340148926, |
|
"eval_test_meteor": 0.3781022025701054, |
|
"eval_test_runtime": 21.5553, |
|
"eval_test_samples_per_second": 46.949, |
|
"eval_test_scarebleu_bp": 1.0, |
|
"eval_test_scarebleu_counts": [ |
|
9240, |
|
3803, |
|
1804, |
|
862 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
45.42772861356932, |
|
19.676117549668874, |
|
9.84931207687268, |
|
4.981507165973185 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 14.471274188368865, |
|
"eval_test_scarebleu_sys_len": 20340, |
|
"eval_test_scarebleu_totals": [ |
|
20340, |
|
19328, |
|
18316, |
|
17304 |
|
], |
|
"eval_test_steps_per_second": 0.742, |
|
"eval_test_ter_num_edits": 13336, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 76.78931306500834, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.0002995985261087815, |
|
"loss": 2.1011, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 40.91683022147655, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 2.509413957595825, |
|
"eval_validation_meteor": 0.3741422146344069, |
|
"eval_validation_runtime": 19.6815, |
|
"eval_validation_samples_per_second": 50.657, |
|
"eval_validation_scarebleu_bp": 1.0, |
|
"eval_validation_scarebleu_counts": [ |
|
8797, |
|
3539, |
|
1630, |
|
777 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
44.709290506200446, |
|
18.94641040740939, |
|
9.218414206537721, |
|
4.656877434821696 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 13.809213245335025, |
|
"eval_validation_scarebleu_sys_len": 19676, |
|
"eval_validation_scarebleu_totals": [ |
|
19676, |
|
18679, |
|
17682, |
|
16685 |
|
], |
|
"eval_validation_steps_per_second": 0.813, |
|
"eval_validation_ter_num_edits": 12842, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 76.76489927670512, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 41.61549355265803, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 2.518157482147217, |
|
"eval_test_meteor": 0.3797284608780533, |
|
"eval_test_runtime": 19.8347, |
|
"eval_test_samples_per_second": 51.022, |
|
"eval_test_scarebleu_bp": 1.0, |
|
"eval_test_scarebleu_counts": [ |
|
9292, |
|
3832, |
|
1832, |
|
898 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
45.2297507788162, |
|
19.61908662707352, |
|
9.892008639308855, |
|
5.129083847384053 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 14.566553770170765, |
|
"eval_test_scarebleu_sys_len": 20544, |
|
"eval_test_scarebleu_totals": [ |
|
20544, |
|
19532, |
|
18520, |
|
17508 |
|
], |
|
"eval_test_steps_per_second": 0.807, |
|
"eval_test_ter_num_edits": 13324, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 76.72021650256234, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.0002995499290316219, |
|
"loss": 2.0657, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 41.094856625774376, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 2.466625452041626, |
|
"eval_validation_meteor": 0.378875561890472, |
|
"eval_validation_runtime": 18.4931, |
|
"eval_validation_samples_per_second": 53.912, |
|
"eval_validation_scarebleu_bp": 1.0, |
|
"eval_validation_scarebleu_counts": [ |
|
8851, |
|
3584, |
|
1652, |
|
766 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
44.70653601373876, |
|
19.062815807669804, |
|
9.278813749719165, |
|
4.557624799190814 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 13.778351975109262, |
|
"eval_validation_scarebleu_sys_len": 19798, |
|
"eval_validation_scarebleu_totals": [ |
|
19798, |
|
18801, |
|
17804, |
|
16807 |
|
], |
|
"eval_validation_steps_per_second": 0.865, |
|
"eval_validation_ter_num_edits": 12843, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 76.77087692031802, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 42.01510354031067, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 2.4802513122558594, |
|
"eval_test_meteor": 0.3865124149670556, |
|
"eval_test_runtime": 21.1754, |
|
"eval_test_samples_per_second": 47.791, |
|
"eval_test_scarebleu_bp": 1.0, |
|
"eval_test_scarebleu_counts": [ |
|
9418, |
|
3919, |
|
1907, |
|
937 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
45.62542389303362, |
|
19.964340295466123, |
|
10.242775808357504, |
|
5.322049301374531 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 14.927586937806508, |
|
"eval_test_scarebleu_sys_len": 20642, |
|
"eval_test_scarebleu_totals": [ |
|
20642, |
|
19630, |
|
18618, |
|
17606 |
|
], |
|
"eval_test_steps_per_second": 0.756, |
|
"eval_test_ter_num_edits": 13273, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 76.42655611216675, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.00029949856069258176, |
|
"loss": 2.042, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"eval_validation_chrf_beta": 2, |
|
"eval_validation_chrf_char_order": 6, |
|
"eval_validation_chrf_score": 41.49472756887711, |
|
"eval_validation_chrf_word_order": 2, |
|
"eval_validation_loss": 2.428771734237671, |
|
"eval_validation_meteor": 0.3799987462633123, |
|
"eval_validation_runtime": 19.422, |
|
"eval_validation_samples_per_second": 51.333, |
|
"eval_validation_scarebleu_bp": 1.0, |
|
"eval_validation_scarebleu_counts": [ |
|
8875, |
|
3637, |
|
1710, |
|
824 |
|
], |
|
"eval_validation_scarebleu_precisions": [ |
|
45.40570960810396, |
|
19.607526012183946, |
|
9.742479489516864, |
|
4.9773482331621866 |
|
], |
|
"eval_validation_scarebleu_ref_len": 19414, |
|
"eval_validation_scarebleu_score": 14.41452044509331, |
|
"eval_validation_scarebleu_sys_len": 19546, |
|
"eval_validation_scarebleu_totals": [ |
|
19546, |
|
18549, |
|
17552, |
|
16555 |
|
], |
|
"eval_validation_steps_per_second": 0.824, |
|
"eval_validation_ter_num_edits": 12677, |
|
"eval_validation_ter_ref_length": 16729.0, |
|
"eval_validation_ter_score": 75.77858808057863, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"eval_test_chrf_beta": 2, |
|
"eval_test_chrf_char_order": 6, |
|
"eval_test_chrf_score": 42.150596446775204, |
|
"eval_test_chrf_word_order": 2, |
|
"eval_test_loss": 2.435882091522217, |
|
"eval_test_meteor": 0.3867430131575975, |
|
"eval_test_runtime": 21.5564, |
|
"eval_test_samples_per_second": 46.947, |
|
"eval_test_scarebleu_bp": 1.0, |
|
"eval_test_scarebleu_counts": [ |
|
9407, |
|
3916, |
|
1868, |
|
903 |
|
], |
|
"eval_test_scarebleu_precisions": [ |
|
45.89899975603806, |
|
20.099573987578914, |
|
10.113150343782145, |
|
5.172117532504726 |
|
], |
|
"eval_test_scarebleu_ref_len": 20238, |
|
"eval_test_scarebleu_score": 14.821297704758008, |
|
"eval_test_scarebleu_sys_len": 20495, |
|
"eval_test_scarebleu_totals": [ |
|
20495, |
|
19483, |
|
18471, |
|
17459 |
|
], |
|
"eval_test_steps_per_second": 0.742, |
|
"eval_test_ter_num_edits": 13178, |
|
"eval_test_ter_ref_length": 17367.0, |
|
"eval_test_ter_score": 75.87954165946911, |
|
"step": 21000 |
|
} |
|
], |
|
"max_steps": 731800, |
|
"num_train_epochs": 200, |
|
"total_flos": 1.4694529965249004e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|