|
{ |
|
"best_metric": 0.8748746375681541, |
|
"best_model_checkpoint": "v3dsyn8105.san-roberta-large", |
|
"epoch": 3.0, |
|
"global_step": 6126, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"eval_avg_sts": 0.831522642680213, |
|
"eval_sickr_spearman": 0.819481868688736, |
|
"eval_stsb_spearman": 0.8435634166716901, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_avg_sts": 0.8394633788185508, |
|
"eval_sickr_spearman": 0.8260827829593759, |
|
"eval_stsb_spearman": 0.8528439746777257, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_avg_sts": 0.8387226540359579, |
|
"eval_sickr_spearman": 0.8118421377561928, |
|
"eval_stsb_spearman": 0.865603170315723, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.183806725432584e-06, |
|
"loss": 0.4868, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_avg_sts": 0.8340637818765272, |
|
"eval_sickr_spearman": 0.8097645044323858, |
|
"eval_stsb_spearman": 0.8583630593206686, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_avg_sts": 0.8467054456929737, |
|
"eval_sickr_spearman": 0.8272759235502001, |
|
"eval_stsb_spearman": 0.8661349678357472, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_avg_sts": 0.8413222265544698, |
|
"eval_sickr_spearman": 0.816061670017531, |
|
"eval_stsb_spearman": 0.8665827830914085, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_avg_sts": 0.8498089066273828, |
|
"eval_sickr_spearman": 0.8340720362170844, |
|
"eval_stsb_spearman": 0.8655457770376812, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.367613450865165e-06, |
|
"loss": 0.1238, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_avg_sts": 0.8486804289849034, |
|
"eval_sickr_spearman": 0.8251996831283348, |
|
"eval_stsb_spearman": 0.8721611748414719, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_avg_sts": 0.8467220562876994, |
|
"eval_sickr_spearman": 0.8293082155142495, |
|
"eval_stsb_spearman": 0.8641358970611491, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_avg_sts": 0.8450297472977634, |
|
"eval_sickr_spearman": 0.8284341455302786, |
|
"eval_stsb_spearman": 0.8616253490652481, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_avg_sts": 0.8399309524387217, |
|
"eval_sickr_spearman": 0.817097460720044, |
|
"eval_stsb_spearman": 0.8627644441573996, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.551420176297748e-06, |
|
"loss": 0.1051, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_avg_sts": 0.8488287267471869, |
|
"eval_sickr_spearman": 0.8298005823350916, |
|
"eval_stsb_spearman": 0.8678568711592822, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_avg_sts": 0.8500693337499818, |
|
"eval_sickr_spearman": 0.832001703620696, |
|
"eval_stsb_spearman": 0.8681369638792675, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_avg_sts": 0.8539883178184261, |
|
"eval_sickr_spearman": 0.8361804094458117, |
|
"eval_stsb_spearman": 0.8717962261910405, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_avg_sts": 0.8506565279083476, |
|
"eval_sickr_spearman": 0.8323338386871405, |
|
"eval_stsb_spearman": 0.8689792171295547, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.735226901730331e-06, |
|
"loss": 0.0964, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_avg_sts": 0.8566417773406937, |
|
"eval_sickr_spearman": 0.8427579325700513, |
|
"eval_stsb_spearman": 0.8705256221113361, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_avg_sts": 0.8506860271225665, |
|
"eval_sickr_spearman": 0.8358071597565362, |
|
"eval_stsb_spearman": 0.8655648944885967, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_avg_sts": 0.8526145284643478, |
|
"eval_sickr_spearman": 0.8352194992304408, |
|
"eval_stsb_spearman": 0.8700095576982549, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_avg_sts": 0.8512789094461894, |
|
"eval_sickr_spearman": 0.8339942258327547, |
|
"eval_stsb_spearman": 0.8685635930596243, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5.919033627162912e-06, |
|
"loss": 0.0724, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_avg_sts": 0.8542737971692321, |
|
"eval_sickr_spearman": 0.8376304683982283, |
|
"eval_stsb_spearman": 0.870917125940236, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_avg_sts": 0.853215393568972, |
|
"eval_sickr_spearman": 0.8382630380041683, |
|
"eval_stsb_spearman": 0.8681677491337757, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_avg_sts": 0.8512616991878239, |
|
"eval_sickr_spearman": 0.8376803727126226, |
|
"eval_stsb_spearman": 0.8648430256630253, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_avg_sts": 0.8445713819963026, |
|
"eval_sickr_spearman": 0.8245212438205214, |
|
"eval_stsb_spearman": 0.8646215201720836, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.102840352595495e-06, |
|
"loss": 0.0672, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_avg_sts": 0.8547468779877831, |
|
"eval_sickr_spearman": 0.840675015825086, |
|
"eval_stsb_spearman": 0.8688187401504804, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_avg_sts": 0.8480351385120373, |
|
"eval_sickr_spearman": 0.828842553985807, |
|
"eval_stsb_spearman": 0.8672277230382677, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_avg_sts": 0.851759527195628, |
|
"eval_sickr_spearman": 0.8318612606800909, |
|
"eval_stsb_spearman": 0.871657793711165, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_avg_sts": 0.852688164945469, |
|
"eval_sickr_spearman": 0.8305016923227837, |
|
"eval_stsb_spearman": 0.8748746375681541, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.286647078028077e-06, |
|
"loss": 0.0671, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_avg_sts": 0.8476247717318004, |
|
"eval_sickr_spearman": 0.825738544055369, |
|
"eval_stsb_spearman": 0.8695109994082318, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_avg_sts": 0.8488707460006111, |
|
"eval_sickr_spearman": 0.8274017650359679, |
|
"eval_stsb_spearman": 0.8703397269652542, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_avg_sts": 0.8474621405059352, |
|
"eval_sickr_spearman": 0.8264578578305062, |
|
"eval_stsb_spearman": 0.8684664231813641, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_avg_sts": 0.8514977368080117, |
|
"eval_sickr_spearman": 0.8317304719908749, |
|
"eval_stsb_spearman": 0.8712650016251485, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.47045380346066e-06, |
|
"loss": 0.062, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_avg_sts": 0.8495823034187644, |
|
"eval_sickr_spearman": 0.8280114718376232, |
|
"eval_stsb_spearman": 0.8711531349999054, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_avg_sts": 0.8472080808839515, |
|
"eval_sickr_spearman": 0.8245909849798095, |
|
"eval_stsb_spearman": 0.8698251767880936, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_avg_sts": 0.8502145621787748, |
|
"eval_sickr_spearman": 0.8296834344786841, |
|
"eval_stsb_spearman": 0.8707456898788655, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_avg_sts": 0.8491882134989206, |
|
"eval_sickr_spearman": 0.8280905310305905, |
|
"eval_stsb_spearman": 0.8702858959672507, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.654260528893242e-06, |
|
"loss": 0.0507, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_avg_sts": 0.8489413736886298, |
|
"eval_sickr_spearman": 0.8264288470452376, |
|
"eval_stsb_spearman": 0.8714539003320219, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_avg_sts": 0.8403852153317499, |
|
"eval_sickr_spearman": 0.8150179061521793, |
|
"eval_stsb_spearman": 0.8657525245113207, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_avg_sts": 0.8448990255323547, |
|
"eval_sickr_spearman": 0.8213330834003639, |
|
"eval_stsb_spearman": 0.8684649676643454, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_avg_sts": 0.846744031918401, |
|
"eval_sickr_spearman": 0.8236191236733109, |
|
"eval_stsb_spearman": 0.8698689401634909, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.8380672543258246e-06, |
|
"loss": 0.0444, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_avg_sts": 0.8446342020355491, |
|
"eval_sickr_spearman": 0.8218428374799265, |
|
"eval_stsb_spearman": 0.8674255665911716, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_avg_sts": 0.8466793063787386, |
|
"eval_sickr_spearman": 0.8251428143042321, |
|
"eval_stsb_spearman": 0.8682157984532451, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_avg_sts": 0.8472692519799312, |
|
"eval_sickr_spearman": 0.8273726581884964, |
|
"eval_stsb_spearman": 0.8671658457713661, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_avg_sts": 0.8463050384868719, |
|
"eval_sickr_spearman": 0.8245047211216268, |
|
"eval_stsb_spearman": 0.868105355852117, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.021873979758407e-06, |
|
"loss": 0.0437, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_avg_sts": 0.845129637101806, |
|
"eval_sickr_spearman": 0.8231749320472111, |
|
"eval_stsb_spearman": 0.8670843421564008, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_avg_sts": 0.844980005172939, |
|
"eval_sickr_spearman": 0.8226033139089961, |
|
"eval_stsb_spearman": 0.8673566964368821, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_avg_sts": 0.8454315705537849, |
|
"eval_sickr_spearman": 0.8231090814071395, |
|
"eval_stsb_spearman": 0.8677540597004305, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_avg_sts": 0.8447869475256539, |
|
"eval_sickr_spearman": 0.8219029724189271, |
|
"eval_stsb_spearman": 0.8676709226323807, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 2.0568070519098922e-07, |
|
"loss": 0.0443, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_avg_sts": 0.8453072401863975, |
|
"eval_sickr_spearman": 0.8228684455889346, |
|
"eval_stsb_spearman": 0.8677460347838605, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_avg_sts": 0.8456332694526848, |
|
"eval_sickr_spearman": 0.8233002451908632, |
|
"eval_stsb_spearman": 0.8679662937145064, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 6126, |
|
"train_runtime": 4896.9547, |
|
"train_samples_per_second": 1.251 |
|
} |
|
], |
|
"max_steps": 6126, |
|
"num_train_epochs": 3, |
|
"total_flos": 213963637016494080, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|