bge-small-matryoshka-fine-tuned
/
bge-small-hotpotwa-matryoshka-fine-tuned-50
/checkpoint-3500
/trainer_state.json
{ | |
"best_metric": 0.8931613819214387, | |
"best_model_checkpoint": "bge-small-hotpotwa-matryoshka-fine-tuned-50/checkpoint-500", | |
"epoch": 23.559108119478335, | |
"eval_steps": 50, | |
"global_step": 3500, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.33655868742111905, | |
"grad_norm": 1.7359095811843872, | |
"learning_rate": 1.3513513513513515e-06, | |
"loss": 19.5758, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.33655868742111905, | |
"eval_dim_128_cosine_accuracy": 0.9551585423568386, | |
"eval_dim_128_dot_accuracy": 0.08980123047799338, | |
"eval_dim_128_euclidean_accuracy": 0.9530288689067676, | |
"eval_dim_128_manhattan_accuracy": 0.9527922385234264, | |
"eval_dim_128_max_accuracy": 0.9551585423568386, | |
"eval_dim_256_cosine_accuracy": 0.966280170373876, | |
"eval_dim_256_dot_accuracy": 0.042711784193090394, | |
"eval_dim_256_euclidean_accuracy": 0.9659252247988642, | |
"eval_dim_256_manhattan_accuracy": 0.9634406057737813, | |
"eval_dim_256_max_accuracy": 0.966280170373876, | |
"eval_dim_384_cosine_accuracy": 0.9667534311405585, | |
"eval_dim_384_dot_accuracy": 0.03324656885944155, | |
"eval_dim_384_euclidean_accuracy": 0.9667534311405585, | |
"eval_dim_384_manhattan_accuracy": 0.9669900615238997, | |
"eval_dim_384_max_accuracy": 0.9669900615238997, | |
"eval_dim_64_cosine_accuracy": 0.9358731661145291, | |
"eval_dim_64_dot_accuracy": 0.1320397539044013, | |
"eval_dim_64_euclidean_accuracy": 0.9345716990061524, | |
"eval_dim_64_manhattan_accuracy": 0.9269995267392334, | |
"eval_dim_64_max_accuracy": 0.9358731661145291, | |
"eval_loss": 19.393272399902344, | |
"eval_runtime": 104.7788, | |
"eval_samples_per_second": 80.665, | |
"eval_sequential_score": 0.9358731661145291, | |
"eval_steps_per_second": 2.529, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.6731173748422381, | |
"grad_norm": 1.976278305053711, | |
"learning_rate": 2.702702702702703e-06, | |
"loss": 19.4573, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.6731173748422381, | |
"eval_dim_128_cosine_accuracy": 0.9570515854235684, | |
"eval_dim_128_dot_accuracy": 0.06625650733554188, | |
"eval_dim_128_euclidean_accuracy": 0.9589446284902982, | |
"eval_dim_128_manhattan_accuracy": 0.9557501183151916, | |
"eval_dim_128_max_accuracy": 0.9589446284902982, | |
"eval_dim_256_cosine_accuracy": 0.9646237576904875, | |
"eval_dim_256_dot_accuracy": 0.04046379555134879, | |
"eval_dim_256_euclidean_accuracy": 0.9650970184571699, | |
"eval_dim_256_manhattan_accuracy": 0.9632039753904401, | |
"eval_dim_256_max_accuracy": 0.9650970184571699, | |
"eval_dim_384_cosine_accuracy": 0.9653336488405111, | |
"eval_dim_384_dot_accuracy": 0.03466635115948888, | |
"eval_dim_384_euclidean_accuracy": 0.9653336488405111, | |
"eval_dim_384_manhattan_accuracy": 0.9646237576904875, | |
"eval_dim_384_max_accuracy": 0.9653336488405111, | |
"eval_dim_64_cosine_accuracy": 0.9449834358731661, | |
"eval_dim_64_dot_accuracy": 0.08932796971131093, | |
"eval_dim_64_euclidean_accuracy": 0.9461665877898722, | |
"eval_dim_64_manhattan_accuracy": 0.9420255560814008, | |
"eval_dim_64_max_accuracy": 0.9461665877898722, | |
"eval_loss": 19.097097396850586, | |
"eval_runtime": 103.9699, | |
"eval_samples_per_second": 81.293, | |
"eval_sequential_score": 0.9449834358731661, | |
"eval_steps_per_second": 2.549, | |
"step": 100 | |
}, | |
{ | |
"epoch": 1.0096760622633572, | |
"grad_norm": 2.1209616661071777, | |
"learning_rate": 4.0540540540540545e-06, | |
"loss": 19.1409, | |
"step": 150 | |
}, | |
{ | |
"epoch": 1.0096760622633572, | |
"eval_dim_128_cosine_accuracy": 0.9384761003312825, | |
"eval_dim_128_dot_accuracy": 0.06897775674396593, | |
"eval_dim_128_euclidean_accuracy": 0.9421438712730714, | |
"eval_dim_128_manhattan_accuracy": 0.939540937056318, | |
"eval_dim_128_max_accuracy": 0.9421438712730714, | |
"eval_dim_256_cosine_accuracy": 0.9434453383814482, | |
"eval_dim_256_dot_accuracy": 0.05797444391859915, | |
"eval_dim_256_euclidean_accuracy": 0.9436819687647894, | |
"eval_dim_256_manhattan_accuracy": 0.9423805016564126, | |
"eval_dim_256_max_accuracy": 0.9436819687647894, | |
"eval_dim_384_cosine_accuracy": 0.9473497397065783, | |
"eval_dim_384_dot_accuracy": 0.05265026029342167, | |
"eval_dim_384_euclidean_accuracy": 0.9473497397065783, | |
"eval_dim_384_manhattan_accuracy": 0.9458116422148604, | |
"eval_dim_384_max_accuracy": 0.9473497397065783, | |
"eval_dim_64_cosine_accuracy": 0.9306672976810223, | |
"eval_dim_64_dot_accuracy": 0.07749645054424988, | |
"eval_dim_64_euclidean_accuracy": 0.9332702318977757, | |
"eval_dim_64_manhattan_accuracy": 0.9320870799810695, | |
"eval_dim_64_max_accuracy": 0.9332702318977757, | |
"eval_loss": 18.4069766998291, | |
"eval_runtime": 103.2125, | |
"eval_samples_per_second": 81.889, | |
"eval_sequential_score": 0.9306672976810223, | |
"eval_steps_per_second": 2.568, | |
"step": 150 | |
}, | |
{ | |
"epoch": 1.3462347496844762, | |
"grad_norm": 1.658170461654663, | |
"learning_rate": 5.405405405405406e-06, | |
"loss": 18.6431, | |
"step": 200 | |
}, | |
{ | |
"epoch": 1.3462347496844762, | |
"eval_dim_128_cosine_accuracy": 0.9125650733554188, | |
"eval_dim_128_dot_accuracy": 0.08826313298627544, | |
"eval_dim_128_euclidean_accuracy": 0.9139848556554662, | |
"eval_dim_128_manhattan_accuracy": 0.9145764316138192, | |
"eval_dim_128_max_accuracy": 0.9145764316138192, | |
"eval_dim_256_cosine_accuracy": 0.9163511594888784, | |
"eval_dim_256_dot_accuracy": 0.08613345953620445, | |
"eval_dim_256_euclidean_accuracy": 0.9163511594888784, | |
"eval_dim_256_manhattan_accuracy": 0.9151680075721723, | |
"eval_dim_256_max_accuracy": 0.9163511594888784, | |
"eval_dim_384_cosine_accuracy": 0.9183625177472787, | |
"eval_dim_384_dot_accuracy": 0.08163748225272124, | |
"eval_dim_384_euclidean_accuracy": 0.9183625177472787, | |
"eval_dim_384_manhattan_accuracy": 0.9184808329389493, | |
"eval_dim_384_max_accuracy": 0.9184808329389493, | |
"eval_dim_64_cosine_accuracy": 0.9093705631803124, | |
"eval_dim_64_dot_accuracy": 0.09477046852815901, | |
"eval_dim_64_euclidean_accuracy": 0.9126833885470894, | |
"eval_dim_64_manhattan_accuracy": 0.9113819214387128, | |
"eval_dim_64_max_accuracy": 0.9126833885470894, | |
"eval_loss": 17.32919692993164, | |
"eval_runtime": 102.8811, | |
"eval_samples_per_second": 82.153, | |
"eval_sequential_score": 0.9093705631803124, | |
"eval_steps_per_second": 2.576, | |
"step": 200 | |
}, | |
{ | |
"epoch": 1.6827934371055953, | |
"grad_norm": 1.5389924049377441, | |
"learning_rate": 6.7567567567567575e-06, | |
"loss": 18.2288, | |
"step": 250 | |
}, | |
{ | |
"epoch": 1.6827934371055953, | |
"eval_dim_128_cosine_accuracy": 0.9062943681968765, | |
"eval_dim_128_dot_accuracy": 0.09311405584477046, | |
"eval_dim_128_euclidean_accuracy": 0.9062943681968765, | |
"eval_dim_128_manhattan_accuracy": 0.9062943681968765, | |
"eval_dim_128_max_accuracy": 0.9062943681968765, | |
"eval_dim_256_cosine_accuracy": 0.9071225745385707, | |
"eval_dim_256_dot_accuracy": 0.09335068622811168, | |
"eval_dim_256_euclidean_accuracy": 0.907950780880265, | |
"eval_dim_256_manhattan_accuracy": 0.9093705631803124, | |
"eval_dim_256_max_accuracy": 0.9093705631803124, | |
"eval_dim_384_cosine_accuracy": 0.9099621391386654, | |
"eval_dim_384_dot_accuracy": 0.0900378608613346, | |
"eval_dim_384_euclidean_accuracy": 0.9099621391386654, | |
"eval_dim_384_manhattan_accuracy": 0.9087789872219593, | |
"eval_dim_384_max_accuracy": 0.9099621391386654, | |
"eval_dim_64_cosine_accuracy": 0.9022716516800757, | |
"eval_dim_64_dot_accuracy": 0.09962139138665405, | |
"eval_dim_64_euclidean_accuracy": 0.9046379555134879, | |
"eval_dim_64_manhattan_accuracy": 0.9040463795551349, | |
"eval_dim_64_max_accuracy": 0.9046379555134879, | |
"eval_loss": 16.875099182128906, | |
"eval_runtime": 104.7249, | |
"eval_samples_per_second": 80.707, | |
"eval_sequential_score": 0.9022716516800757, | |
"eval_steps_per_second": 2.53, | |
"step": 250 | |
}, | |
{ | |
"epoch": 2.0193521245267143, | |
"grad_norm": 1.4371246099472046, | |
"learning_rate": 8.108108108108109e-06, | |
"loss": 18.0425, | |
"step": 300 | |
}, | |
{ | |
"epoch": 2.0193521245267143, | |
"eval_dim_128_cosine_accuracy": 0.9020350212967345, | |
"eval_dim_128_dot_accuracy": 0.09772834831992427, | |
"eval_dim_128_euclidean_accuracy": 0.9035731187884525, | |
"eval_dim_128_manhattan_accuracy": 0.9044013251301467, | |
"eval_dim_128_max_accuracy": 0.9044013251301467, | |
"eval_dim_256_cosine_accuracy": 0.9032181732134406, | |
"eval_dim_256_dot_accuracy": 0.09690014197823, | |
"eval_dim_256_euclidean_accuracy": 0.90309985802177, | |
"eval_dim_256_manhattan_accuracy": 0.9042830099384761, | |
"eval_dim_256_max_accuracy": 0.9042830099384761, | |
"eval_dim_384_cosine_accuracy": 0.9045196403218173, | |
"eval_dim_384_dot_accuracy": 0.09548035967818268, | |
"eval_dim_384_euclidean_accuracy": 0.9045196403218173, | |
"eval_dim_384_manhattan_accuracy": 0.9049929010884997, | |
"eval_dim_384_max_accuracy": 0.9049929010884997, | |
"eval_dim_64_cosine_accuracy": 0.8989588263132986, | |
"eval_dim_64_dot_accuracy": 0.10234264079507809, | |
"eval_dim_64_euclidean_accuracy": 0.9016800757217227, | |
"eval_dim_64_manhattan_accuracy": 0.9016800757217227, | |
"eval_dim_64_max_accuracy": 0.9016800757217227, | |
"eval_loss": 16.69808578491211, | |
"eval_runtime": 103.4615, | |
"eval_samples_per_second": 81.692, | |
"eval_sequential_score": 0.8989588263132986, | |
"eval_steps_per_second": 2.561, | |
"step": 300 | |
}, | |
{ | |
"epoch": 2.3559108119478336, | |
"grad_norm": 1.386720895767212, | |
"learning_rate": 9.45945945945946e-06, | |
"loss": 17.9458, | |
"step": 350 | |
}, | |
{ | |
"epoch": 2.3559108119478336, | |
"eval_dim_128_cosine_accuracy": 0.9036914339801231, | |
"eval_dim_128_dot_accuracy": 0.09761003312825367, | |
"eval_dim_128_euclidean_accuracy": 0.9034548035967819, | |
"eval_dim_128_manhattan_accuracy": 0.9016800757217227, | |
"eval_dim_128_max_accuracy": 0.9036914339801231, | |
"eval_dim_256_cosine_accuracy": 0.9013251301467108, | |
"eval_dim_256_dot_accuracy": 0.09855655466161856, | |
"eval_dim_256_euclidean_accuracy": 0.9015617605300521, | |
"eval_dim_256_manhattan_accuracy": 0.9022716516800757, | |
"eval_dim_256_max_accuracy": 0.9022716516800757, | |
"eval_dim_384_cosine_accuracy": 0.9021533364884051, | |
"eval_dim_384_dot_accuracy": 0.09784666351159489, | |
"eval_dim_384_euclidean_accuracy": 0.9021533364884051, | |
"eval_dim_384_manhattan_accuracy": 0.9039280643634643, | |
"eval_dim_384_max_accuracy": 0.9039280643634643, | |
"eval_dim_64_cosine_accuracy": 0.8983672503549456, | |
"eval_dim_64_dot_accuracy": 0.10352579271178419, | |
"eval_dim_64_euclidean_accuracy": 0.8995504022716517, | |
"eval_dim_64_manhattan_accuracy": 0.8981306199716044, | |
"eval_dim_64_max_accuracy": 0.8995504022716517, | |
"eval_loss": 16.615509033203125, | |
"eval_runtime": 103.1308, | |
"eval_samples_per_second": 81.954, | |
"eval_sequential_score": 0.8983672503549456, | |
"eval_steps_per_second": 2.57, | |
"step": 350 | |
}, | |
{ | |
"epoch": 2.6924694993689524, | |
"grad_norm": 1.4882862567901611, | |
"learning_rate": 1.0810810810810812e-05, | |
"loss": 17.8525, | |
"step": 400 | |
}, | |
{ | |
"epoch": 2.6924694993689524, | |
"eval_dim_128_cosine_accuracy": 0.8977756743965926, | |
"eval_dim_128_dot_accuracy": 0.10269758637008992, | |
"eval_dim_128_euclidean_accuracy": 0.9006152389966872, | |
"eval_dim_128_manhattan_accuracy": 0.900378608613346, | |
"eval_dim_128_max_accuracy": 0.9006152389966872, | |
"eval_dim_256_cosine_accuracy": 0.8970657832465688, | |
"eval_dim_256_dot_accuracy": 0.10269758637008992, | |
"eval_dim_256_euclidean_accuracy": 0.8980123047799338, | |
"eval_dim_256_manhattan_accuracy": 0.8971840984382394, | |
"eval_dim_256_max_accuracy": 0.8980123047799338, | |
"eval_dim_384_cosine_accuracy": 0.8974207288215806, | |
"eval_dim_384_dot_accuracy": 0.1025792711784193, | |
"eval_dim_384_euclidean_accuracy": 0.8974207288215806, | |
"eval_dim_384_manhattan_accuracy": 0.898248935163275, | |
"eval_dim_384_max_accuracy": 0.898248935163275, | |
"eval_dim_64_cosine_accuracy": 0.8948177946048272, | |
"eval_dim_64_dot_accuracy": 0.10636535731187885, | |
"eval_dim_64_euclidean_accuracy": 0.8969474680548982, | |
"eval_dim_64_manhattan_accuracy": 0.8948177946048272, | |
"eval_dim_64_max_accuracy": 0.8969474680548982, | |
"eval_loss": 16.553625106811523, | |
"eval_runtime": 103.3808, | |
"eval_samples_per_second": 81.756, | |
"eval_sequential_score": 0.8948177946048272, | |
"eval_steps_per_second": 2.563, | |
"step": 400 | |
}, | |
{ | |
"epoch": 3.0290281867900717, | |
"grad_norm": 1.5986053943634033, | |
"learning_rate": 1.2162162162162164e-05, | |
"loss": 17.7529, | |
"step": 450 | |
}, | |
{ | |
"epoch": 3.0290281867900717, | |
"eval_dim_128_cosine_accuracy": 0.8980123047799338, | |
"eval_dim_128_dot_accuracy": 0.10340747752011359, | |
"eval_dim_128_euclidean_accuracy": 0.8997870326549929, | |
"eval_dim_128_manhattan_accuracy": 0.8996687174633223, | |
"eval_dim_128_max_accuracy": 0.8997870326549929, | |
"eval_dim_256_cosine_accuracy": 0.8956460009465216, | |
"eval_dim_256_dot_accuracy": 0.10399905347846664, | |
"eval_dim_256_euclidean_accuracy": 0.8970657832465688, | |
"eval_dim_256_manhattan_accuracy": 0.8960009465215334, | |
"eval_dim_256_max_accuracy": 0.8970657832465688, | |
"eval_dim_384_cosine_accuracy": 0.8952910553715097, | |
"eval_dim_384_dot_accuracy": 0.1047089446284903, | |
"eval_dim_384_euclidean_accuracy": 0.8952910553715097, | |
"eval_dim_384_manhattan_accuracy": 0.8971840984382394, | |
"eval_dim_384_max_accuracy": 0.8971840984382394, | |
"eval_dim_64_cosine_accuracy": 0.8950544249881685, | |
"eval_dim_64_dot_accuracy": 0.10541883577851396, | |
"eval_dim_64_euclidean_accuracy": 0.8969474680548982, | |
"eval_dim_64_manhattan_accuracy": 0.8948177946048272, | |
"eval_dim_64_max_accuracy": 0.8969474680548982, | |
"eval_loss": 16.51355743408203, | |
"eval_runtime": 104.654, | |
"eval_samples_per_second": 80.761, | |
"eval_sequential_score": 0.8950544249881685, | |
"eval_steps_per_second": 2.532, | |
"step": 450 | |
}, | |
{ | |
"epoch": 3.3655868742111905, | |
"grad_norm": 1.8756661415100098, | |
"learning_rate": 1.3513513513513515e-05, | |
"loss": 17.6709, | |
"step": 500 | |
}, | |
{ | |
"epoch": 3.3655868742111905, | |
"eval_dim_128_cosine_accuracy": 0.8931613819214387, | |
"eval_dim_128_dot_accuracy": 0.10766682442025556, | |
"eval_dim_128_euclidean_accuracy": 0.8944628490298154, | |
"eval_dim_128_manhattan_accuracy": 0.8942262186464742, | |
"eval_dim_128_max_accuracy": 0.8944628490298154, | |
"eval_dim_256_cosine_accuracy": 0.8913866540463795, | |
"eval_dim_256_dot_accuracy": 0.10896829152863227, | |
"eval_dim_256_euclidean_accuracy": 0.8937529578797918, | |
"eval_dim_256_manhattan_accuracy": 0.8937529578797918, | |
"eval_dim_256_max_accuracy": 0.8937529578797918, | |
"eval_dim_384_cosine_accuracy": 0.8928064363464269, | |
"eval_dim_384_dot_accuracy": 0.10719356365357312, | |
"eval_dim_384_euclidean_accuracy": 0.8928064363464269, | |
"eval_dim_384_manhattan_accuracy": 0.8932796971131093, | |
"eval_dim_384_max_accuracy": 0.8932796971131093, | |
"eval_dim_64_cosine_accuracy": 0.8906767628963559, | |
"eval_dim_64_dot_accuracy": 0.11121628017037388, | |
"eval_dim_64_euclidean_accuracy": 0.8911500236630383, | |
"eval_dim_64_manhattan_accuracy": 0.8893752957879791, | |
"eval_dim_64_max_accuracy": 0.8911500236630383, | |
"eval_loss": 16.4824161529541, | |
"eval_runtime": 103.2754, | |
"eval_samples_per_second": 81.839, | |
"eval_sequential_score": 0.8906767628963559, | |
"eval_steps_per_second": 2.566, | |
"step": 500 | |
}, | |
{ | |
"epoch": 3.70214556163231, | |
"grad_norm": 2.3590304851531982, | |
"learning_rate": 1.4864864864864865e-05, | |
"loss": 17.5348, | |
"step": 550 | |
}, | |
{ | |
"epoch": 3.70214556163231, | |
"eval_dim_128_cosine_accuracy": 0.8862991008045433, | |
"eval_dim_128_dot_accuracy": 0.11500236630383341, | |
"eval_dim_128_euclidean_accuracy": 0.8864174159962139, | |
"eval_dim_128_manhattan_accuracy": 0.8858258400378609, | |
"eval_dim_128_max_accuracy": 0.8864174159962139, | |
"eval_dim_256_cosine_accuracy": 0.8858258400378609, | |
"eval_dim_256_dot_accuracy": 0.11358258400378608, | |
"eval_dim_256_euclidean_accuracy": 0.8867723615712257, | |
"eval_dim_256_manhattan_accuracy": 0.8858258400378609, | |
"eval_dim_256_max_accuracy": 0.8867723615712257, | |
"eval_dim_384_cosine_accuracy": 0.8859441552295315, | |
"eval_dim_384_dot_accuracy": 0.11405584477046853, | |
"eval_dim_384_euclidean_accuracy": 0.8859441552295315, | |
"eval_dim_384_manhattan_accuracy": 0.88760056791292, | |
"eval_dim_384_max_accuracy": 0.88760056791292, | |
"eval_dim_64_cosine_accuracy": 0.884879318504496, | |
"eval_dim_64_dot_accuracy": 0.11985328916232844, | |
"eval_dim_64_euclidean_accuracy": 0.8845243729294842, | |
"eval_dim_64_manhattan_accuracy": 0.8828679602460956, | |
"eval_dim_64_max_accuracy": 0.884879318504496, | |
"eval_loss": 16.463218688964844, | |
"eval_runtime": 103.2788, | |
"eval_samples_per_second": 81.837, | |
"eval_sequential_score": 0.884879318504496, | |
"eval_steps_per_second": 2.566, | |
"step": 550 | |
}, | |
{ | |
"epoch": 4.038704249053429, | |
"grad_norm": 2.6120336055755615, | |
"learning_rate": 1.6216216216216218e-05, | |
"loss": 17.4198, | |
"step": 600 | |
}, | |
{ | |
"epoch": 4.038704249053429, | |
"eval_dim_128_cosine_accuracy": 0.8852342640795078, | |
"eval_dim_128_dot_accuracy": 0.11748698532891623, | |
"eval_dim_128_euclidean_accuracy": 0.8846426881211548, | |
"eval_dim_128_manhattan_accuracy": 0.8859441552295315, | |
"eval_dim_128_max_accuracy": 0.8859441552295315, | |
"eval_dim_256_cosine_accuracy": 0.8861807856128727, | |
"eval_dim_256_dot_accuracy": 0.1137008991954567, | |
"eval_dim_256_euclidean_accuracy": 0.8871273071462376, | |
"eval_dim_256_manhattan_accuracy": 0.8866540463795551, | |
"eval_dim_256_max_accuracy": 0.8871273071462376, | |
"eval_dim_384_cosine_accuracy": 0.8859441552295315, | |
"eval_dim_384_dot_accuracy": 0.11405584477046853, | |
"eval_dim_384_euclidean_accuracy": 0.8859441552295315, | |
"eval_dim_384_manhattan_accuracy": 0.8847610033128254, | |
"eval_dim_384_max_accuracy": 0.8859441552295315, | |
"eval_dim_64_cosine_accuracy": 0.8839327969711311, | |
"eval_dim_64_dot_accuracy": 0.12103644107903455, | |
"eval_dim_64_euclidean_accuracy": 0.8861807856128727, | |
"eval_dim_64_manhattan_accuracy": 0.8857075248461902, | |
"eval_dim_64_max_accuracy": 0.8861807856128727, | |
"eval_loss": 16.46009063720703, | |
"eval_runtime": 104.1113, | |
"eval_samples_per_second": 81.182, | |
"eval_sequential_score": 0.8839327969711311, | |
"eval_steps_per_second": 2.545, | |
"step": 600 | |
}, | |
{ | |
"epoch": 4.375262936474548, | |
"grad_norm": 2.63383412361145, | |
"learning_rate": 1.756756756756757e-05, | |
"loss": 17.3673, | |
"step": 650 | |
}, | |
{ | |
"epoch": 4.375262936474548, | |
"eval_dim_128_cosine_accuracy": 0.8853525792711784, | |
"eval_dim_128_dot_accuracy": 0.1160672030288689, | |
"eval_dim_128_euclidean_accuracy": 0.8867723615712257, | |
"eval_dim_128_manhattan_accuracy": 0.8855892096545196, | |
"eval_dim_128_max_accuracy": 0.8867723615712257, | |
"eval_dim_256_cosine_accuracy": 0.8864174159962139, | |
"eval_dim_256_dot_accuracy": 0.11417415996213914, | |
"eval_dim_256_euclidean_accuracy": 0.8871273071462376, | |
"eval_dim_256_manhattan_accuracy": 0.8862991008045433, | |
"eval_dim_256_max_accuracy": 0.8871273071462376, | |
"eval_dim_384_cosine_accuracy": 0.8865357311878845, | |
"eval_dim_384_dot_accuracy": 0.11346426881211548, | |
"eval_dim_384_euclidean_accuracy": 0.8865357311878845, | |
"eval_dim_384_manhattan_accuracy": 0.8861807856128727, | |
"eval_dim_384_max_accuracy": 0.8865357311878845, | |
"eval_dim_64_cosine_accuracy": 0.8841694273544723, | |
"eval_dim_64_dot_accuracy": 0.12091812588736393, | |
"eval_dim_64_euclidean_accuracy": 0.883341221012778, | |
"eval_dim_64_manhattan_accuracy": 0.8828679602460956, | |
"eval_dim_64_max_accuracy": 0.8841694273544723, | |
"eval_loss": 16.440513610839844, | |
"eval_runtime": 102.5958, | |
"eval_samples_per_second": 82.382, | |
"eval_sequential_score": 0.8841694273544723, | |
"eval_steps_per_second": 2.583, | |
"step": 650 | |
}, | |
{ | |
"epoch": 4.711821623895667, | |
"grad_norm": 3.044569730758667, | |
"learning_rate": 1.891891891891892e-05, | |
"loss": 17.2603, | |
"step": 700 | |
}, | |
{ | |
"epoch": 4.711821623895667, | |
"eval_dim_128_cosine_accuracy": 0.8834595362044486, | |
"eval_dim_128_dot_accuracy": 0.11772361571225745, | |
"eval_dim_128_euclidean_accuracy": 0.8835778513961192, | |
"eval_dim_128_manhattan_accuracy": 0.8840511121628017, | |
"eval_dim_128_max_accuracy": 0.8840511121628017, | |
"eval_dim_256_cosine_accuracy": 0.8838144817794605, | |
"eval_dim_256_dot_accuracy": 0.11571225745385708, | |
"eval_dim_256_euclidean_accuracy": 0.8838144817794605, | |
"eval_dim_256_manhattan_accuracy": 0.8839327969711311, | |
"eval_dim_256_max_accuracy": 0.8839327969711311, | |
"eval_dim_384_cosine_accuracy": 0.8838144817794605, | |
"eval_dim_384_dot_accuracy": 0.11618551822053952, | |
"eval_dim_384_euclidean_accuracy": 0.8838144817794605, | |
"eval_dim_384_manhattan_accuracy": 0.8847610033128254, | |
"eval_dim_384_max_accuracy": 0.8847610033128254, | |
"eval_dim_64_cosine_accuracy": 0.8807382867960246, | |
"eval_dim_64_dot_accuracy": 0.12328442972077615, | |
"eval_dim_64_euclidean_accuracy": 0.8814481779460482, | |
"eval_dim_64_manhattan_accuracy": 0.8810932323710364, | |
"eval_dim_64_max_accuracy": 0.8814481779460482, | |
"eval_loss": 16.435609817504883, | |
"eval_runtime": 103.6437, | |
"eval_samples_per_second": 81.549, | |
"eval_sequential_score": 0.8807382867960246, | |
"eval_steps_per_second": 2.557, | |
"step": 700 | |
}, | |
{ | |
"epoch": 5.0483803113167856, | |
"grad_norm": 3.3264880180358887, | |
"learning_rate": 1.9999888744757143e-05, | |
"loss": 17.1807, | |
"step": 750 | |
}, | |
{ | |
"epoch": 5.0483803113167856, | |
"eval_dim_128_cosine_accuracy": 0.8849976336961666, | |
"eval_dim_128_dot_accuracy": 0.11654046379555134, | |
"eval_dim_128_euclidean_accuracy": 0.884879318504496, | |
"eval_dim_128_manhattan_accuracy": 0.8838144817794605, | |
"eval_dim_128_max_accuracy": 0.8849976336961666, | |
"eval_dim_256_cosine_accuracy": 0.8864174159962139, | |
"eval_dim_256_dot_accuracy": 0.11417415996213914, | |
"eval_dim_256_euclidean_accuracy": 0.8852342640795078, | |
"eval_dim_256_manhattan_accuracy": 0.8857075248461902, | |
"eval_dim_256_max_accuracy": 0.8864174159962139, | |
"eval_dim_384_cosine_accuracy": 0.8859441552295315, | |
"eval_dim_384_dot_accuracy": 0.11405584477046853, | |
"eval_dim_384_euclidean_accuracy": 0.8859441552295315, | |
"eval_dim_384_manhattan_accuracy": 0.8855892096545196, | |
"eval_dim_384_max_accuracy": 0.8859441552295315, | |
"eval_dim_64_cosine_accuracy": 0.8838144817794605, | |
"eval_dim_64_dot_accuracy": 0.12079981069569333, | |
"eval_dim_64_euclidean_accuracy": 0.8844060577378136, | |
"eval_dim_64_manhattan_accuracy": 0.8834595362044486, | |
"eval_dim_64_max_accuracy": 0.8844060577378136, | |
"eval_loss": 16.444347381591797, | |
"eval_runtime": 103.5226, | |
"eval_samples_per_second": 81.644, | |
"eval_sequential_score": 0.8838144817794605, | |
"eval_steps_per_second": 2.56, | |
"step": 750 | |
}, | |
{ | |
"epoch": 5.384938998737905, | |
"grad_norm": 2.7032034397125244, | |
"learning_rate": 1.999599507118322e-05, | |
"loss": 17.1629, | |
"step": 800 | |
}, | |
{ | |
"epoch": 5.384938998737905, | |
"eval_dim_128_cosine_accuracy": 0.8847610033128254, | |
"eval_dim_128_dot_accuracy": 0.11701372456223379, | |
"eval_dim_128_euclidean_accuracy": 0.8859441552295315, | |
"eval_dim_128_manhattan_accuracy": 0.884879318504496, | |
"eval_dim_128_max_accuracy": 0.8859441552295315, | |
"eval_dim_256_cosine_accuracy": 0.8861807856128727, | |
"eval_dim_256_dot_accuracy": 0.11417415996213914, | |
"eval_dim_256_euclidean_accuracy": 0.8859441552295315, | |
"eval_dim_256_manhattan_accuracy": 0.8853525792711784, | |
"eval_dim_256_max_accuracy": 0.8861807856128727, | |
"eval_dim_384_cosine_accuracy": 0.8866540463795551, | |
"eval_dim_384_dot_accuracy": 0.11334595362044486, | |
"eval_dim_384_euclidean_accuracy": 0.8866540463795551, | |
"eval_dim_384_manhattan_accuracy": 0.8862991008045433, | |
"eval_dim_384_max_accuracy": 0.8866540463795551, | |
"eval_dim_64_cosine_accuracy": 0.8841694273544723, | |
"eval_dim_64_dot_accuracy": 0.11831519167061051, | |
"eval_dim_64_euclidean_accuracy": 0.8841694273544723, | |
"eval_dim_64_manhattan_accuracy": 0.8839327969711311, | |
"eval_dim_64_max_accuracy": 0.8841694273544723, | |
"eval_loss": 16.420166015625, | |
"eval_runtime": 103.5297, | |
"eval_samples_per_second": 81.638, | |
"eval_sequential_score": 0.8841694273544723, | |
"eval_steps_per_second": 2.56, | |
"step": 800 | |
}, | |
{ | |
"epoch": 5.721497686159024, | |
"grad_norm": 3.8163998126983643, | |
"learning_rate": 1.9986541110764565e-05, | |
"loss": 17.0747, | |
"step": 850 | |
}, | |
{ | |
"epoch": 5.721497686159024, | |
"eval_dim_128_cosine_accuracy": 0.8853525792711784, | |
"eval_dim_128_dot_accuracy": 0.11618551822053952, | |
"eval_dim_128_euclidean_accuracy": 0.8835778513961192, | |
"eval_dim_128_manhattan_accuracy": 0.8845243729294842, | |
"eval_dim_128_max_accuracy": 0.8853525792711784, | |
"eval_dim_256_cosine_accuracy": 0.8874822527212494, | |
"eval_dim_256_dot_accuracy": 0.11358258400378608, | |
"eval_dim_256_euclidean_accuracy": 0.8864174159962139, | |
"eval_dim_256_manhattan_accuracy": 0.8862991008045433, | |
"eval_dim_256_max_accuracy": 0.8874822527212494, | |
"eval_dim_384_cosine_accuracy": 0.8868906767628963, | |
"eval_dim_384_dot_accuracy": 0.11310932323710364, | |
"eval_dim_384_euclidean_accuracy": 0.8868906767628963, | |
"eval_dim_384_manhattan_accuracy": 0.8862991008045433, | |
"eval_dim_384_max_accuracy": 0.8868906767628963, | |
"eval_dim_64_cosine_accuracy": 0.8836961665877898, | |
"eval_dim_64_dot_accuracy": 0.11867013724562234, | |
"eval_dim_64_euclidean_accuracy": 0.8831045906294368, | |
"eval_dim_64_manhattan_accuracy": 0.8832229058211074, | |
"eval_dim_64_max_accuracy": 0.8836961665877898, | |
"eval_loss": 16.416208267211914, | |
"eval_runtime": 103.4694, | |
"eval_samples_per_second": 81.686, | |
"eval_sequential_score": 0.8836961665877898, | |
"eval_steps_per_second": 2.561, | |
"step": 850 | |
}, | |
{ | |
"epoch": 6.058056373580143, | |
"grad_norm": 3.9848620891571045, | |
"learning_rate": 1.9971532122280466e-05, | |
"loss": 17.0161, | |
"step": 900 | |
}, | |
{ | |
"epoch": 6.058056373580143, | |
"eval_dim_128_cosine_accuracy": 0.8852342640795078, | |
"eval_dim_128_dot_accuracy": 0.11618551822053952, | |
"eval_dim_128_euclidean_accuracy": 0.8852342640795078, | |
"eval_dim_128_manhattan_accuracy": 0.8846426881211548, | |
"eval_dim_128_max_accuracy": 0.8852342640795078, | |
"eval_dim_256_cosine_accuracy": 0.8862991008045433, | |
"eval_dim_256_dot_accuracy": 0.11417415996213914, | |
"eval_dim_256_euclidean_accuracy": 0.8858258400378609, | |
"eval_dim_256_manhattan_accuracy": 0.8853525792711784, | |
"eval_dim_256_max_accuracy": 0.8862991008045433, | |
"eval_dim_384_cosine_accuracy": 0.8855892096545196, | |
"eval_dim_384_dot_accuracy": 0.11441079034548036, | |
"eval_dim_384_euclidean_accuracy": 0.8855892096545196, | |
"eval_dim_384_manhattan_accuracy": 0.885470894462849, | |
"eval_dim_384_max_accuracy": 0.8855892096545196, | |
"eval_dim_64_cosine_accuracy": 0.8855892096545196, | |
"eval_dim_64_dot_accuracy": 0.11831519167061051, | |
"eval_dim_64_euclidean_accuracy": 0.885470894462849, | |
"eval_dim_64_manhattan_accuracy": 0.8834595362044486, | |
"eval_dim_64_max_accuracy": 0.8855892096545196, | |
"eval_loss": 16.419212341308594, | |
"eval_runtime": 104.3001, | |
"eval_samples_per_second": 81.035, | |
"eval_sequential_score": 0.8855892096545196, | |
"eval_steps_per_second": 2.541, | |
"step": 900 | |
}, | |
{ | |
"epoch": 6.394615061001262, | |
"grad_norm": 4.083323001861572, | |
"learning_rate": 1.995097645450266e-05, | |
"loss": 17.0146, | |
"step": 950 | |
}, | |
{ | |
"epoch": 6.394615061001262, | |
"eval_dim_128_cosine_accuracy": 0.884879318504496, | |
"eval_dim_128_dot_accuracy": 0.1171320397539044, | |
"eval_dim_128_euclidean_accuracy": 0.8861807856128727, | |
"eval_dim_128_manhattan_accuracy": 0.8853525792711784, | |
"eval_dim_128_max_accuracy": 0.8861807856128727, | |
"eval_dim_256_cosine_accuracy": 0.8853525792711784, | |
"eval_dim_256_dot_accuracy": 0.11464742072882159, | |
"eval_dim_256_euclidean_accuracy": 0.885470894462849, | |
"eval_dim_256_manhattan_accuracy": 0.8858258400378609, | |
"eval_dim_256_max_accuracy": 0.8858258400378609, | |
"eval_dim_384_cosine_accuracy": 0.8855892096545196, | |
"eval_dim_384_dot_accuracy": 0.11441079034548036, | |
"eval_dim_384_euclidean_accuracy": 0.8855892096545196, | |
"eval_dim_384_manhattan_accuracy": 0.8864174159962139, | |
"eval_dim_384_max_accuracy": 0.8864174159962139, | |
"eval_dim_64_cosine_accuracy": 0.8844060577378136, | |
"eval_dim_64_dot_accuracy": 0.11796024609559867, | |
"eval_dim_64_euclidean_accuracy": 0.8852342640795078, | |
"eval_dim_64_manhattan_accuracy": 0.8844060577378136, | |
"eval_dim_64_max_accuracy": 0.8852342640795078, | |
"eval_loss": 16.403297424316406, | |
"eval_runtime": 102.2875, | |
"eval_samples_per_second": 82.63, | |
"eval_sequential_score": 0.8844060577378136, | |
"eval_steps_per_second": 2.591, | |
"step": 950 | |
}, | |
{ | |
"epoch": 6.731173748422381, | |
"grad_norm": 3.874021291732788, | |
"learning_rate": 1.992488554155135e-05, | |
"loss": 16.9393, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 6.731173748422381, | |
"eval_dim_128_cosine_accuracy": 0.8828679602460956, | |
"eval_dim_128_dot_accuracy": 0.11784193090392807, | |
"eval_dim_128_euclidean_accuracy": 0.8846426881211548, | |
"eval_dim_128_manhattan_accuracy": 0.8841694273544723, | |
"eval_dim_128_max_accuracy": 0.8846426881211548, | |
"eval_dim_256_cosine_accuracy": 0.8839327969711311, | |
"eval_dim_256_dot_accuracy": 0.1171320397539044, | |
"eval_dim_256_euclidean_accuracy": 0.8840511121628017, | |
"eval_dim_256_manhattan_accuracy": 0.8852342640795078, | |
"eval_dim_256_max_accuracy": 0.8852342640795078, | |
"eval_dim_384_cosine_accuracy": 0.8847610033128254, | |
"eval_dim_384_dot_accuracy": 0.11523899668717463, | |
"eval_dim_384_euclidean_accuracy": 0.8847610033128254, | |
"eval_dim_384_manhattan_accuracy": 0.8852342640795078, | |
"eval_dim_384_max_accuracy": 0.8852342640795078, | |
"eval_dim_64_cosine_accuracy": 0.8834595362044486, | |
"eval_dim_64_dot_accuracy": 0.11831519167061051, | |
"eval_dim_64_euclidean_accuracy": 0.8835778513961192, | |
"eval_dim_64_manhattan_accuracy": 0.8820397539044014, | |
"eval_dim_64_max_accuracy": 0.8835778513961192, | |
"eval_loss": 16.40532684326172, | |
"eval_runtime": 104.0121, | |
"eval_samples_per_second": 81.26, | |
"eval_sequential_score": 0.8834595362044486, | |
"eval_steps_per_second": 2.548, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 7.0677324358435, | |
"grad_norm": 4.689154148101807, | |
"learning_rate": 1.9893273896534936e-05, | |
"loss": 16.899, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 7.0677324358435, | |
"eval_dim_128_cosine_accuracy": 0.8826313298627544, | |
"eval_dim_128_dot_accuracy": 0.11867013724562234, | |
"eval_dim_128_euclidean_accuracy": 0.8823946994794132, | |
"eval_dim_128_manhattan_accuracy": 0.882158069096072, | |
"eval_dim_128_max_accuracy": 0.8826313298627544, | |
"eval_dim_256_cosine_accuracy": 0.8828679602460956, | |
"eval_dim_256_dot_accuracy": 0.11725035494557501, | |
"eval_dim_256_euclidean_accuracy": 0.8831045906294368, | |
"eval_dim_256_manhattan_accuracy": 0.8834595362044486, | |
"eval_dim_256_max_accuracy": 0.8834595362044486, | |
"eval_dim_384_cosine_accuracy": 0.883341221012778, | |
"eval_dim_384_dot_accuracy": 0.11665877898722196, | |
"eval_dim_384_euclidean_accuracy": 0.883341221012778, | |
"eval_dim_384_manhattan_accuracy": 0.8839327969711311, | |
"eval_dim_384_max_accuracy": 0.8839327969711311, | |
"eval_dim_64_cosine_accuracy": 0.88180312352106, | |
"eval_dim_64_dot_accuracy": 0.11890676762896356, | |
"eval_dim_64_euclidean_accuracy": 0.8828679602460956, | |
"eval_dim_64_manhattan_accuracy": 0.8820397539044014, | |
"eval_dim_64_max_accuracy": 0.8828679602460956, | |
"eval_loss": 16.416202545166016, | |
"eval_runtime": 104.6249, | |
"eval_samples_per_second": 80.784, | |
"eval_sequential_score": 0.88180312352106, | |
"eval_steps_per_second": 2.533, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 7.40429112326462, | |
"grad_norm": 3.6406683921813965, | |
"learning_rate": 1.9856159103477085e-05, | |
"loss": 16.9112, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 7.40429112326462, | |
"eval_dim_128_cosine_accuracy": 0.8828679602460956, | |
"eval_dim_128_dot_accuracy": 0.11878845243729295, | |
"eval_dim_128_euclidean_accuracy": 0.8828679602460956, | |
"eval_dim_128_manhattan_accuracy": 0.8828679602460956, | |
"eval_dim_128_max_accuracy": 0.8828679602460956, | |
"eval_dim_256_cosine_accuracy": 0.8834595362044486, | |
"eval_dim_256_dot_accuracy": 0.11618551822053952, | |
"eval_dim_256_euclidean_accuracy": 0.8826313298627544, | |
"eval_dim_256_manhattan_accuracy": 0.8840511121628017, | |
"eval_dim_256_max_accuracy": 0.8840511121628017, | |
"eval_dim_384_cosine_accuracy": 0.883341221012778, | |
"eval_dim_384_dot_accuracy": 0.11665877898722196, | |
"eval_dim_384_euclidean_accuracy": 0.883341221012778, | |
"eval_dim_384_manhattan_accuracy": 0.884287742546143, | |
"eval_dim_384_max_accuracy": 0.884287742546143, | |
"eval_dim_64_cosine_accuracy": 0.8820397539044014, | |
"eval_dim_64_dot_accuracy": 0.11914339801230478, | |
"eval_dim_64_euclidean_accuracy": 0.8831045906294368, | |
"eval_dim_64_manhattan_accuracy": 0.8826313298627544, | |
"eval_dim_64_max_accuracy": 0.8831045906294368, | |
"eval_loss": 16.405092239379883, | |
"eval_runtime": 101.4605, | |
"eval_samples_per_second": 83.303, | |
"eval_sequential_score": 0.8820397539044014, | |
"eval_steps_per_second": 2.612, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 7.740849810685738, | |
"grad_norm": 4.141761302947998, | |
"learning_rate": 1.9813561807535597e-05, | |
"loss": 16.8508, | |
"step": 1150 | |
}, | |
{ | |
"epoch": 7.740849810685738, | |
"eval_dim_128_cosine_accuracy": 0.882158069096072, | |
"eval_dim_128_dot_accuracy": 0.11878845243729295, | |
"eval_dim_128_euclidean_accuracy": 0.8825130146710838, | |
"eval_dim_128_manhattan_accuracy": 0.8838144817794605, | |
"eval_dim_128_max_accuracy": 0.8838144817794605, | |
"eval_dim_256_cosine_accuracy": 0.8825130146710838, | |
"eval_dim_256_dot_accuracy": 0.11748698532891623, | |
"eval_dim_256_euclidean_accuracy": 0.8831045906294368, | |
"eval_dim_256_manhattan_accuracy": 0.8835778513961192, | |
"eval_dim_256_max_accuracy": 0.8835778513961192, | |
"eval_dim_384_cosine_accuracy": 0.8829862754377662, | |
"eval_dim_384_dot_accuracy": 0.11701372456223379, | |
"eval_dim_384_euclidean_accuracy": 0.8829862754377662, | |
"eval_dim_384_manhattan_accuracy": 0.883341221012778, | |
"eval_dim_384_max_accuracy": 0.883341221012778, | |
"eval_dim_64_cosine_accuracy": 0.8820397539044014, | |
"eval_dim_64_dot_accuracy": 0.12115475627070516, | |
"eval_dim_64_euclidean_accuracy": 0.882158069096072, | |
"eval_dim_64_manhattan_accuracy": 0.882749645054425, | |
"eval_dim_64_max_accuracy": 0.882749645054425, | |
"eval_loss": 16.40436363220215, | |
"eval_runtime": 102.9818, | |
"eval_samples_per_second": 82.073, | |
"eval_sequential_score": 0.8820397539044014, | |
"eval_steps_per_second": 2.573, | |
"step": 1150 | |
}, | |
{ | |
"epoch": 8.077408498106857, | |
"grad_norm": 3.7137351036071777, | |
"learning_rate": 1.9765505703518494e-05, | |
"loss": 16.8104, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 8.077408498106857, | |
"eval_dim_128_cosine_accuracy": 0.8815664931377188, | |
"eval_dim_128_dot_accuracy": 0.119380028395646, | |
"eval_dim_128_euclidean_accuracy": 0.8813298627543776, | |
"eval_dim_128_manhattan_accuracy": 0.8820397539044014, | |
"eval_dim_128_max_accuracy": 0.8820397539044014, | |
"eval_dim_256_cosine_accuracy": 0.8815664931377188, | |
"eval_dim_256_dot_accuracy": 0.11796024609559867, | |
"eval_dim_256_euclidean_accuracy": 0.8808566019876952, | |
"eval_dim_256_manhattan_accuracy": 0.8807382867960246, | |
"eval_dim_256_max_accuracy": 0.8815664931377188, | |
"eval_dim_384_cosine_accuracy": 0.8814481779460482, | |
"eval_dim_384_dot_accuracy": 0.11855182205395173, | |
"eval_dim_384_euclidean_accuracy": 0.8814481779460482, | |
"eval_dim_384_manhattan_accuracy": 0.880619971604354, | |
"eval_dim_384_max_accuracy": 0.8814481779460482, | |
"eval_dim_64_cosine_accuracy": 0.8816848083293894, | |
"eval_dim_64_dot_accuracy": 0.12174633222905822, | |
"eval_dim_64_euclidean_accuracy": 0.880619971604354, | |
"eval_dim_64_manhattan_accuracy": 0.8809749171793658, | |
"eval_dim_64_max_accuracy": 0.8816848083293894, | |
"eval_loss": 16.40627670288086, | |
"eval_runtime": 104.9051, | |
"eval_samples_per_second": 80.568, | |
"eval_sequential_score": 0.8816848083293894, | |
"eval_steps_per_second": 2.526, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 8.413967185527977, | |
"grad_norm": 3.3535964488983154, | |
"learning_rate": 1.9712017522703764e-05, | |
"loss": 16.8212, | |
"step": 1250 | |
}, | |
{ | |
"epoch": 8.413967185527977, | |
"eval_dim_128_cosine_accuracy": 0.8834595362044486, | |
"eval_dim_128_dot_accuracy": 0.11796024609559867, | |
"eval_dim_128_euclidean_accuracy": 0.882749645054425, | |
"eval_dim_128_manhattan_accuracy": 0.8825130146710838, | |
"eval_dim_128_max_accuracy": 0.8834595362044486, | |
"eval_dim_256_cosine_accuracy": 0.882158069096072, | |
"eval_dim_256_dot_accuracy": 0.11748698532891623, | |
"eval_dim_256_euclidean_accuracy": 0.8823946994794132, | |
"eval_dim_256_manhattan_accuracy": 0.8819214387127308, | |
"eval_dim_256_max_accuracy": 0.8823946994794132, | |
"eval_dim_384_cosine_accuracy": 0.882158069096072, | |
"eval_dim_384_dot_accuracy": 0.11784193090392807, | |
"eval_dim_384_euclidean_accuracy": 0.882158069096072, | |
"eval_dim_384_manhattan_accuracy": 0.882749645054425, | |
"eval_dim_384_max_accuracy": 0.882749645054425, | |
"eval_dim_64_cosine_accuracy": 0.8820397539044014, | |
"eval_dim_64_dot_accuracy": 0.12091812588736393, | |
"eval_dim_64_euclidean_accuracy": 0.8819214387127308, | |
"eval_dim_64_manhattan_accuracy": 0.8815664931377188, | |
"eval_dim_64_max_accuracy": 0.8820397539044014, | |
"eval_loss": 16.40399169921875, | |
"eval_runtime": 103.0829, | |
"eval_samples_per_second": 81.992, | |
"eval_sequential_score": 0.8820397539044014, | |
"eval_steps_per_second": 2.571, | |
"step": 1250 | |
}, | |
{ | |
"epoch": 8.750525872949096, | |
"grad_norm": 4.203086853027344, | |
"learning_rate": 1.9653127017970035e-05, | |
"loss": 16.7743, | |
"step": 1300 | |
}, | |
{ | |
"epoch": 8.750525872949096, | |
"eval_dim_128_cosine_accuracy": 0.882158069096072, | |
"eval_dim_128_dot_accuracy": 0.12020823473734027, | |
"eval_dim_128_euclidean_accuracy": 0.8815664931377188, | |
"eval_dim_128_manhattan_accuracy": 0.8814481779460482, | |
"eval_dim_128_max_accuracy": 0.882158069096072, | |
"eval_dim_256_cosine_accuracy": 0.8823946994794132, | |
"eval_dim_256_dot_accuracy": 0.11878845243729295, | |
"eval_dim_256_euclidean_accuracy": 0.8819214387127308, | |
"eval_dim_256_manhattan_accuracy": 0.8816848083293894, | |
"eval_dim_256_max_accuracy": 0.8823946994794132, | |
"eval_dim_384_cosine_accuracy": 0.8816848083293894, | |
"eval_dim_384_dot_accuracy": 0.11831519167061051, | |
"eval_dim_384_euclidean_accuracy": 0.8816848083293894, | |
"eval_dim_384_manhattan_accuracy": 0.882158069096072, | |
"eval_dim_384_max_accuracy": 0.882158069096072, | |
"eval_dim_64_cosine_accuracy": 0.8809749171793658, | |
"eval_dim_64_dot_accuracy": 0.121509701845717, | |
"eval_dim_64_euclidean_accuracy": 0.8807382867960246, | |
"eval_dim_64_manhattan_accuracy": 0.881211547562707, | |
"eval_dim_64_max_accuracy": 0.881211547562707, | |
"eval_loss": 16.39342498779297, | |
"eval_runtime": 102.6649, | |
"eval_samples_per_second": 82.326, | |
"eval_sequential_score": 0.8809749171793658, | |
"eval_steps_per_second": 2.581, | |
"step": 1300 | |
}, | |
{ | |
"epoch": 9.087084560370215, | |
"grad_norm": 3.313908576965332, | |
"learning_rate": 1.9588866947246498e-05, | |
"loss": 16.7383, | |
"step": 1350 | |
}, | |
{ | |
"epoch": 9.087084560370215, | |
"eval_dim_128_cosine_accuracy": 0.8809749171793658, | |
"eval_dim_128_dot_accuracy": 0.12068149550402271, | |
"eval_dim_128_euclidean_accuracy": 0.8808566019876952, | |
"eval_dim_128_manhattan_accuracy": 0.8814481779460482, | |
"eval_dim_128_max_accuracy": 0.8814481779460482, | |
"eval_dim_256_cosine_accuracy": 0.8820397539044014, | |
"eval_dim_256_dot_accuracy": 0.11831519167061051, | |
"eval_dim_256_euclidean_accuracy": 0.8810932323710364, | |
"eval_dim_256_manhattan_accuracy": 0.881211547562707, | |
"eval_dim_256_max_accuracy": 0.8820397539044014, | |
"eval_dim_384_cosine_accuracy": 0.8807382867960246, | |
"eval_dim_384_dot_accuracy": 0.11926171320397538, | |
"eval_dim_384_euclidean_accuracy": 0.8807382867960246, | |
"eval_dim_384_manhattan_accuracy": 0.8803833412210128, | |
"eval_dim_384_max_accuracy": 0.8807382867960246, | |
"eval_dim_64_cosine_accuracy": 0.880028395646001, | |
"eval_dim_64_dot_accuracy": 0.12245622337908188, | |
"eval_dim_64_euclidean_accuracy": 0.8807382867960246, | |
"eval_dim_64_manhattan_accuracy": 0.8816848083293894, | |
"eval_dim_64_max_accuracy": 0.8816848083293894, | |
"eval_loss": 16.39626121520996, | |
"eval_runtime": 105.1167, | |
"eval_samples_per_second": 80.406, | |
"eval_sequential_score": 0.880028395646001, | |
"eval_steps_per_second": 2.521, | |
"step": 1350 | |
}, | |
{ | |
"epoch": 9.423643247791334, | |
"grad_norm": 6.617325305938721, | |
"learning_rate": 1.9519273055291266e-05, | |
"loss": 16.743, | |
"step": 1400 | |
}, | |
{ | |
"epoch": 9.423643247791334, | |
"eval_dim_128_cosine_accuracy": 0.8819214387127308, | |
"eval_dim_128_dot_accuracy": 0.119380028395646, | |
"eval_dim_128_euclidean_accuracy": 0.8826313298627544, | |
"eval_dim_128_manhattan_accuracy": 0.8815664931377188, | |
"eval_dim_128_max_accuracy": 0.8826313298627544, | |
"eval_dim_256_cosine_accuracy": 0.882158069096072, | |
"eval_dim_256_dot_accuracy": 0.11784193090392807, | |
"eval_dim_256_euclidean_accuracy": 0.882158069096072, | |
"eval_dim_256_manhattan_accuracy": 0.8816848083293894, | |
"eval_dim_256_max_accuracy": 0.882158069096072, | |
"eval_dim_384_cosine_accuracy": 0.8819214387127308, | |
"eval_dim_384_dot_accuracy": 0.11807856128726929, | |
"eval_dim_384_euclidean_accuracy": 0.8819214387127308, | |
"eval_dim_384_manhattan_accuracy": 0.8826313298627544, | |
"eval_dim_384_max_accuracy": 0.8826313298627544, | |
"eval_dim_64_cosine_accuracy": 0.8797917652626597, | |
"eval_dim_64_dot_accuracy": 0.12091812588736393, | |
"eval_dim_64_euclidean_accuracy": 0.8810932323710364, | |
"eval_dim_64_manhattan_accuracy": 0.8807382867960246, | |
"eval_dim_64_max_accuracy": 0.8810932323710364, | |
"eval_loss": 16.406700134277344, | |
"eval_runtime": 101.1577, | |
"eval_samples_per_second": 83.553, | |
"eval_sequential_score": 0.8797917652626597, | |
"eval_steps_per_second": 2.62, | |
"step": 1400 | |
}, | |
{ | |
"epoch": 9.760201935212454, | |
"grad_norm": 4.450948715209961, | |
"learning_rate": 1.944438405380829e-05, | |
"loss": 16.7047, | |
"step": 1450 | |
}, | |
{ | |
"epoch": 9.760201935212454, | |
"eval_dim_128_cosine_accuracy": 0.8803833412210128, | |
"eval_dim_128_dot_accuracy": 0.12056318031235211, | |
"eval_dim_128_euclidean_accuracy": 0.8810932323710364, | |
"eval_dim_128_manhattan_accuracy": 0.8802650260293422, | |
"eval_dim_128_max_accuracy": 0.8810932323710364, | |
"eval_dim_256_cosine_accuracy": 0.8809749171793658, | |
"eval_dim_256_dot_accuracy": 0.11914339801230478, | |
"eval_dim_256_euclidean_accuracy": 0.8813298627543776, | |
"eval_dim_256_manhattan_accuracy": 0.881211547562707, | |
"eval_dim_256_max_accuracy": 0.8813298627543776, | |
"eval_dim_384_cosine_accuracy": 0.8809749171793658, | |
"eval_dim_384_dot_accuracy": 0.11902508282063418, | |
"eval_dim_384_euclidean_accuracy": 0.8809749171793658, | |
"eval_dim_384_manhattan_accuracy": 0.8820397539044014, | |
"eval_dim_384_max_accuracy": 0.8820397539044014, | |
"eval_dim_64_cosine_accuracy": 0.8796734500709891, | |
"eval_dim_64_dot_accuracy": 0.12245622337908188, | |
"eval_dim_64_euclidean_accuracy": 0.880028395646001, | |
"eval_dim_64_manhattan_accuracy": 0.8803833412210128, | |
"eval_dim_64_max_accuracy": 0.8803833412210128, | |
"eval_loss": 16.39591407775879, | |
"eval_runtime": 102.018, | |
"eval_samples_per_second": 82.848, | |
"eval_sequential_score": 0.8796734500709891, | |
"eval_steps_per_second": 2.598, | |
"step": 1450 | |
}, | |
{ | |
"epoch": 10.096760622633571, | |
"grad_norm": 6.13853120803833, | |
"learning_rate": 1.9364241599913923e-05, | |
"loss": 16.6782, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 10.096760622633571, | |
"eval_dim_128_cosine_accuracy": 0.8788452437292948, | |
"eval_dim_128_dot_accuracy": 0.1228111689540937, | |
"eval_dim_128_euclidean_accuracy": 0.8796734500709891, | |
"eval_dim_128_manhattan_accuracy": 0.879081874112636, | |
"eval_dim_128_max_accuracy": 0.8796734500709891, | |
"eval_dim_256_cosine_accuracy": 0.879081874112636, | |
"eval_dim_256_dot_accuracy": 0.12091812588736393, | |
"eval_dim_256_euclidean_accuracy": 0.8797917652626597, | |
"eval_dim_256_manhattan_accuracy": 0.8803833412210128, | |
"eval_dim_256_max_accuracy": 0.8803833412210128, | |
"eval_dim_384_cosine_accuracy": 0.8795551348793185, | |
"eval_dim_384_dot_accuracy": 0.12044486512068149, | |
"eval_dim_384_euclidean_accuracy": 0.8795551348793185, | |
"eval_dim_384_manhattan_accuracy": 0.8799100804543304, | |
"eval_dim_384_max_accuracy": 0.8799100804543304, | |
"eval_dim_64_cosine_accuracy": 0.8783719829626124, | |
"eval_dim_64_dot_accuracy": 0.12363937529578797, | |
"eval_dim_64_euclidean_accuracy": 0.8795551348793185, | |
"eval_dim_64_manhattan_accuracy": 0.8781353525792712, | |
"eval_dim_64_max_accuracy": 0.8795551348793185, | |
"eval_loss": 16.398588180541992, | |
"eval_runtime": 103.6429, | |
"eval_samples_per_second": 81.549, | |
"eval_sequential_score": 0.8783719829626124, | |
"eval_steps_per_second": 2.557, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 10.43331931005469, | |
"grad_norm": 4.757913112640381, | |
"learning_rate": 1.9278890272965097e-05, | |
"loss": 16.6708, | |
"step": 1550 | |
}, | |
{ | |
"epoch": 10.43331931005469, | |
"eval_dim_128_cosine_accuracy": 0.8794368196876479, | |
"eval_dim_128_dot_accuracy": 0.121509701845717, | |
"eval_dim_128_euclidean_accuracy": 0.8795551348793185, | |
"eval_dim_128_manhattan_accuracy": 0.879081874112636, | |
"eval_dim_128_max_accuracy": 0.8795551348793185, | |
"eval_dim_256_cosine_accuracy": 0.8792001893043067, | |
"eval_dim_256_dot_accuracy": 0.11961665877898722, | |
"eval_dim_256_euclidean_accuracy": 0.8794368196876479, | |
"eval_dim_256_manhattan_accuracy": 0.8795551348793185, | |
"eval_dim_256_max_accuracy": 0.8795551348793185, | |
"eval_dim_384_cosine_accuracy": 0.8796734500709891, | |
"eval_dim_384_dot_accuracy": 0.12032654992901089, | |
"eval_dim_384_euclidean_accuracy": 0.8796734500709891, | |
"eval_dim_384_manhattan_accuracy": 0.8809749171793658, | |
"eval_dim_384_max_accuracy": 0.8809749171793658, | |
"eval_dim_64_cosine_accuracy": 0.879081874112636, | |
"eval_dim_64_dot_accuracy": 0.12245622337908188, | |
"eval_dim_64_euclidean_accuracy": 0.8796734500709891, | |
"eval_dim_64_manhattan_accuracy": 0.8802650260293422, | |
"eval_dim_64_max_accuracy": 0.8802650260293422, | |
"eval_loss": 16.401565551757812, | |
"eval_runtime": 103.0896, | |
"eval_samples_per_second": 81.987, | |
"eval_sequential_score": 0.879081874112636, | |
"eval_steps_per_second": 2.571, | |
"step": 1550 | |
}, | |
{ | |
"epoch": 10.76987799747581, | |
"grad_norm": 5.452834129333496, | |
"learning_rate": 1.9188377549761962e-05, | |
"loss": 16.6485, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 10.76987799747581, | |
"eval_dim_128_cosine_accuracy": 0.8789635589209654, | |
"eval_dim_128_dot_accuracy": 0.1216280170373876, | |
"eval_dim_128_euclidean_accuracy": 0.8789635589209654, | |
"eval_dim_128_manhattan_accuracy": 0.8781353525792712, | |
"eval_dim_128_max_accuracy": 0.8789635589209654, | |
"eval_dim_256_cosine_accuracy": 0.8801467108376716, | |
"eval_dim_256_dot_accuracy": 0.11985328916232844, | |
"eval_dim_256_euclidean_accuracy": 0.8796734500709891, | |
"eval_dim_256_manhattan_accuracy": 0.8794368196876479, | |
"eval_dim_256_max_accuracy": 0.8801467108376716, | |
"eval_dim_384_cosine_accuracy": 0.879081874112636, | |
"eval_dim_384_dot_accuracy": 0.12091812588736393, | |
"eval_dim_384_euclidean_accuracy": 0.879081874112636, | |
"eval_dim_384_manhattan_accuracy": 0.8794368196876479, | |
"eval_dim_384_max_accuracy": 0.8794368196876479, | |
"eval_dim_64_cosine_accuracy": 0.8781353525792712, | |
"eval_dim_64_dot_accuracy": 0.12304779933743493, | |
"eval_dim_64_euclidean_accuracy": 0.8783719829626124, | |
"eval_dim_64_manhattan_accuracy": 0.879081874112636, | |
"eval_dim_64_max_accuracy": 0.879081874112636, | |
"eval_loss": 16.396345138549805, | |
"eval_runtime": 103.471, | |
"eval_samples_per_second": 81.685, | |
"eval_sequential_score": 0.8781353525792712, | |
"eval_steps_per_second": 2.561, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 11.106436684896929, | |
"grad_norm": 3.5591487884521484, | |
"learning_rate": 1.9092753778138885e-05, | |
"loss": 16.6205, | |
"step": 1650 | |
}, | |
{ | |
"epoch": 11.106436684896929, | |
"eval_dim_128_cosine_accuracy": 0.8778987221959299, | |
"eval_dim_128_dot_accuracy": 0.12316611452910553, | |
"eval_dim_128_euclidean_accuracy": 0.8781353525792712, | |
"eval_dim_128_manhattan_accuracy": 0.8780170373876006, | |
"eval_dim_128_max_accuracy": 0.8781353525792712, | |
"eval_dim_256_cosine_accuracy": 0.8787269285376242, | |
"eval_dim_256_dot_accuracy": 0.121509701845717, | |
"eval_dim_256_euclidean_accuracy": 0.8787269285376242, | |
"eval_dim_256_manhattan_accuracy": 0.8793185044959773, | |
"eval_dim_256_max_accuracy": 0.8793185044959773, | |
"eval_dim_384_cosine_accuracy": 0.8793185044959773, | |
"eval_dim_384_dot_accuracy": 0.12068149550402271, | |
"eval_dim_384_euclidean_accuracy": 0.8793185044959773, | |
"eval_dim_384_manhattan_accuracy": 0.8801467108376716, | |
"eval_dim_384_max_accuracy": 0.8801467108376716, | |
"eval_dim_64_cosine_accuracy": 0.8770705158542357, | |
"eval_dim_64_dot_accuracy": 0.12541410317084714, | |
"eval_dim_64_euclidean_accuracy": 0.8771888310459063, | |
"eval_dim_64_manhattan_accuracy": 0.8776620918125887, | |
"eval_dim_64_max_accuracy": 0.8776620918125887, | |
"eval_loss": 16.401174545288086, | |
"eval_runtime": 102.9169, | |
"eval_samples_per_second": 82.124, | |
"eval_sequential_score": 0.8770705158542357, | |
"eval_steps_per_second": 2.575, | |
"step": 1650 | |
}, | |
{ | |
"epoch": 11.442995372318048, | |
"grad_norm": 3.712305784225464, | |
"learning_rate": 1.8992072148958368e-05, | |
"loss": 16.6095, | |
"step": 1700 | |
}, | |
{ | |
"epoch": 11.442995372318048, | |
"eval_dim_128_cosine_accuracy": 0.8786086133459536, | |
"eval_dim_128_dot_accuracy": 0.12233790818741126, | |
"eval_dim_128_euclidean_accuracy": 0.878490298154283, | |
"eval_dim_128_manhattan_accuracy": 0.8786086133459536, | |
"eval_dim_128_max_accuracy": 0.8786086133459536, | |
"eval_dim_256_cosine_accuracy": 0.8789635589209654, | |
"eval_dim_256_dot_accuracy": 0.1216280170373876, | |
"eval_dim_256_euclidean_accuracy": 0.879081874112636, | |
"eval_dim_256_manhattan_accuracy": 0.8780170373876006, | |
"eval_dim_256_max_accuracy": 0.879081874112636, | |
"eval_dim_384_cosine_accuracy": 0.8794368196876479, | |
"eval_dim_384_dot_accuracy": 0.12056318031235211, | |
"eval_dim_384_euclidean_accuracy": 0.8794368196876479, | |
"eval_dim_384_manhattan_accuracy": 0.879081874112636, | |
"eval_dim_384_max_accuracy": 0.8794368196876479, | |
"eval_dim_64_cosine_accuracy": 0.879081874112636, | |
"eval_dim_64_dot_accuracy": 0.12541410317084714, | |
"eval_dim_64_euclidean_accuracy": 0.8777804070042593, | |
"eval_dim_64_manhattan_accuracy": 0.8788452437292948, | |
"eval_dim_64_max_accuracy": 0.879081874112636, | |
"eval_loss": 16.413122177124023, | |
"eval_runtime": 103.5898, | |
"eval_samples_per_second": 81.591, | |
"eval_sequential_score": 0.879081874112636, | |
"eval_steps_per_second": 2.558, | |
"step": 1700 | |
}, | |
{ | |
"epoch": 11.779554059739167, | |
"grad_norm": 4.9205145835876465, | |
"learning_rate": 1.888638866652356e-05, | |
"loss": 16.5891, | |
"step": 1750 | |
}, | |
{ | |
"epoch": 11.779554059739167, | |
"eval_dim_128_cosine_accuracy": 0.8807382867960246, | |
"eval_dim_128_dot_accuracy": 0.1194983435873166, | |
"eval_dim_128_euclidean_accuracy": 0.8805016564126834, | |
"eval_dim_128_manhattan_accuracy": 0.8792001893043067, | |
"eval_dim_128_max_accuracy": 0.8807382867960246, | |
"eval_dim_256_cosine_accuracy": 0.8805016564126834, | |
"eval_dim_256_dot_accuracy": 0.11902508282063418, | |
"eval_dim_256_euclidean_accuracy": 0.8797917652626597, | |
"eval_dim_256_manhattan_accuracy": 0.8795551348793185, | |
"eval_dim_256_max_accuracy": 0.8805016564126834, | |
"eval_dim_384_cosine_accuracy": 0.8809749171793658, | |
"eval_dim_384_dot_accuracy": 0.11902508282063418, | |
"eval_dim_384_euclidean_accuracy": 0.8809749171793658, | |
"eval_dim_384_manhattan_accuracy": 0.880028395646001, | |
"eval_dim_384_max_accuracy": 0.8809749171793658, | |
"eval_dim_64_cosine_accuracy": 0.8801467108376716, | |
"eval_dim_64_dot_accuracy": 0.12292948414576432, | |
"eval_dim_64_euclidean_accuracy": 0.879081874112636, | |
"eval_dim_64_manhattan_accuracy": 0.879081874112636, | |
"eval_dim_64_max_accuracy": 0.8801467108376716, | |
"eval_loss": 16.40700340270996, | |
"eval_runtime": 103.5887, | |
"eval_samples_per_second": 81.592, | |
"eval_sequential_score": 0.8801467108376716, | |
"eval_steps_per_second": 2.558, | |
"step": 1750 | |
}, | |
{ | |
"epoch": 12.116112747160287, | |
"grad_norm": 4.849546909332275, | |
"learning_rate": 1.8775762117425777e-05, | |
"loss": 16.5619, | |
"step": 1800 | |
}, | |
{ | |
"epoch": 12.116112747160287, | |
"eval_dim_128_cosine_accuracy": 0.8794368196876479, | |
"eval_dim_128_dot_accuracy": 0.121509701845717, | |
"eval_dim_128_euclidean_accuracy": 0.8792001893043067, | |
"eval_dim_128_manhattan_accuracy": 0.8789635589209654, | |
"eval_dim_128_max_accuracy": 0.8794368196876479, | |
"eval_dim_256_cosine_accuracy": 0.880028395646001, | |
"eval_dim_256_dot_accuracy": 0.11973497397065783, | |
"eval_dim_256_euclidean_accuracy": 0.8799100804543304, | |
"eval_dim_256_manhattan_accuracy": 0.8792001893043067, | |
"eval_dim_256_max_accuracy": 0.880028395646001, | |
"eval_dim_384_cosine_accuracy": 0.8796734500709891, | |
"eval_dim_384_dot_accuracy": 0.12032654992901089, | |
"eval_dim_384_euclidean_accuracy": 0.8796734500709891, | |
"eval_dim_384_manhattan_accuracy": 0.8796734500709891, | |
"eval_dim_384_max_accuracy": 0.8796734500709891, | |
"eval_dim_64_cosine_accuracy": 0.8780170373876006, | |
"eval_dim_64_dot_accuracy": 0.12470421202082348, | |
"eval_dim_64_euclidean_accuracy": 0.8797917652626597, | |
"eval_dim_64_manhattan_accuracy": 0.8786086133459536, | |
"eval_dim_64_max_accuracy": 0.8797917652626597, | |
"eval_loss": 16.396265029907227, | |
"eval_runtime": 102.3506, | |
"eval_samples_per_second": 82.579, | |
"eval_sequential_score": 0.8780170373876006, | |
"eval_steps_per_second": 2.589, | |
"step": 1800 | |
}, | |
{ | |
"epoch": 12.452671434581404, | |
"grad_norm": 4.944924831390381, | |
"learning_rate": 1.866025403784439e-05, | |
"loss": 16.5467, | |
"step": 1850 | |
}, | |
{ | |
"epoch": 12.452671434581404, | |
"eval_dim_128_cosine_accuracy": 0.8795551348793185, | |
"eval_dim_128_dot_accuracy": 0.12316611452910553, | |
"eval_dim_128_euclidean_accuracy": 0.8787269285376242, | |
"eval_dim_128_manhattan_accuracy": 0.8794368196876479, | |
"eval_dim_128_max_accuracy": 0.8795551348793185, | |
"eval_dim_256_cosine_accuracy": 0.880619971604354, | |
"eval_dim_256_dot_accuracy": 0.12068149550402271, | |
"eval_dim_256_euclidean_accuracy": 0.8794368196876479, | |
"eval_dim_256_manhattan_accuracy": 0.8801467108376716, | |
"eval_dim_256_max_accuracy": 0.880619971604354, | |
"eval_dim_384_cosine_accuracy": 0.8803833412210128, | |
"eval_dim_384_dot_accuracy": 0.11961665877898722, | |
"eval_dim_384_euclidean_accuracy": 0.8803833412210128, | |
"eval_dim_384_manhattan_accuracy": 0.8807382867960246, | |
"eval_dim_384_max_accuracy": 0.8807382867960246, | |
"eval_dim_64_cosine_accuracy": 0.8789635589209654, | |
"eval_dim_64_dot_accuracy": 0.12470421202082348, | |
"eval_dim_64_euclidean_accuracy": 0.8781353525792712, | |
"eval_dim_64_manhattan_accuracy": 0.8796734500709891, | |
"eval_dim_64_max_accuracy": 0.8796734500709891, | |
"eval_loss": 16.399133682250977, | |
"eval_runtime": 104.1432, | |
"eval_samples_per_second": 81.157, | |
"eval_sequential_score": 0.8789635589209654, | |
"eval_steps_per_second": 2.545, | |
"step": 1850 | |
}, | |
{ | |
"epoch": 12.789230122002524, | |
"grad_norm": 6.032313346862793, | |
"learning_rate": 1.853992867931721e-05, | |
"loss": 16.5398, | |
"step": 1900 | |
}, | |
{ | |
"epoch": 12.789230122002524, | |
"eval_dim_128_cosine_accuracy": 0.8792001893043067, | |
"eval_dim_128_dot_accuracy": 0.12139138665404638, | |
"eval_dim_128_euclidean_accuracy": 0.8797917652626597, | |
"eval_dim_128_manhattan_accuracy": 0.8787269285376242, | |
"eval_dim_128_max_accuracy": 0.8797917652626597, | |
"eval_dim_256_cosine_accuracy": 0.8797917652626597, | |
"eval_dim_256_dot_accuracy": 0.11973497397065783, | |
"eval_dim_256_euclidean_accuracy": 0.8797917652626597, | |
"eval_dim_256_manhattan_accuracy": 0.8792001893043067, | |
"eval_dim_256_max_accuracy": 0.8797917652626597, | |
"eval_dim_384_cosine_accuracy": 0.8801467108376716, | |
"eval_dim_384_dot_accuracy": 0.11985328916232844, | |
"eval_dim_384_euclidean_accuracy": 0.8801467108376716, | |
"eval_dim_384_manhattan_accuracy": 0.8805016564126834, | |
"eval_dim_384_max_accuracy": 0.8805016564126834, | |
"eval_dim_64_cosine_accuracy": 0.8788452437292948, | |
"eval_dim_64_dot_accuracy": 0.12423095125414103, | |
"eval_dim_64_euclidean_accuracy": 0.8793185044959773, | |
"eval_dim_64_manhattan_accuracy": 0.8776620918125887, | |
"eval_dim_64_max_accuracy": 0.8793185044959773, | |
"eval_loss": 16.397045135498047, | |
"eval_runtime": 103.5361, | |
"eval_samples_per_second": 81.633, | |
"eval_sequential_score": 0.8788452437292948, | |
"eval_steps_per_second": 2.559, | |
"step": 1900 | |
}, | |
{ | |
"epoch": 13.125788809423643, | |
"grad_norm": 4.27797269821167, | |
"learning_rate": 1.8414852973000503e-05, | |
"loss": 16.5047, | |
"step": 1950 | |
}, | |
{ | |
"epoch": 13.125788809423643, | |
"eval_dim_128_cosine_accuracy": 0.8795551348793185, | |
"eval_dim_128_dot_accuracy": 0.1216280170373876, | |
"eval_dim_128_euclidean_accuracy": 0.8796734500709891, | |
"eval_dim_128_manhattan_accuracy": 0.8797917652626597, | |
"eval_dim_128_max_accuracy": 0.8797917652626597, | |
"eval_dim_256_cosine_accuracy": 0.8803833412210128, | |
"eval_dim_256_dot_accuracy": 0.12068149550402271, | |
"eval_dim_256_euclidean_accuracy": 0.8797917652626597, | |
"eval_dim_256_manhattan_accuracy": 0.8797917652626597, | |
"eval_dim_256_max_accuracy": 0.8803833412210128, | |
"eval_dim_384_cosine_accuracy": 0.8803833412210128, | |
"eval_dim_384_dot_accuracy": 0.11961665877898722, | |
"eval_dim_384_euclidean_accuracy": 0.8803833412210128, | |
"eval_dim_384_manhattan_accuracy": 0.8805016564126834, | |
"eval_dim_384_max_accuracy": 0.8805016564126834, | |
"eval_dim_64_cosine_accuracy": 0.8788452437292948, | |
"eval_dim_64_dot_accuracy": 0.12588736393752958, | |
"eval_dim_64_euclidean_accuracy": 0.8793185044959773, | |
"eval_dim_64_manhattan_accuracy": 0.8802650260293422, | |
"eval_dim_64_max_accuracy": 0.8802650260293422, | |
"eval_loss": 16.396381378173828, | |
"eval_runtime": 102.672, | |
"eval_samples_per_second": 82.32, | |
"eval_sequential_score": 0.8788452437292948, | |
"eval_steps_per_second": 2.581, | |
"step": 1950 | |
}, | |
{ | |
"epoch": 13.462347496844762, | |
"grad_norm": 4.051229953765869, | |
"learning_rate": 1.8285096492438424e-05, | |
"loss": 16.4985, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 13.462347496844762, | |
"eval_dim_128_cosine_accuracy": 0.8793185044959773, | |
"eval_dim_128_dot_accuracy": 0.12127307146237577, | |
"eval_dim_128_euclidean_accuracy": 0.8803833412210128, | |
"eval_dim_128_manhattan_accuracy": 0.8796734500709891, | |
"eval_dim_128_max_accuracy": 0.8803833412210128, | |
"eval_dim_256_cosine_accuracy": 0.8797917652626597, | |
"eval_dim_256_dot_accuracy": 0.12020823473734027, | |
"eval_dim_256_euclidean_accuracy": 0.8796734500709891, | |
"eval_dim_256_manhattan_accuracy": 0.8797917652626597, | |
"eval_dim_256_max_accuracy": 0.8797917652626597, | |
"eval_dim_384_cosine_accuracy": 0.8807382867960246, | |
"eval_dim_384_dot_accuracy": 0.11926171320397538, | |
"eval_dim_384_euclidean_accuracy": 0.8807382867960246, | |
"eval_dim_384_manhattan_accuracy": 0.8810932323710364, | |
"eval_dim_384_max_accuracy": 0.8810932323710364, | |
"eval_dim_64_cosine_accuracy": 0.8789635589209654, | |
"eval_dim_64_dot_accuracy": 0.12316611452910553, | |
"eval_dim_64_euclidean_accuracy": 0.8787269285376242, | |
"eval_dim_64_manhattan_accuracy": 0.879081874112636, | |
"eval_dim_64_max_accuracy": 0.879081874112636, | |
"eval_loss": 16.4024600982666, | |
"eval_runtime": 104.2185, | |
"eval_samples_per_second": 81.099, | |
"eval_sequential_score": 0.8789635589209654, | |
"eval_steps_per_second": 2.543, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 13.798906184265881, | |
"grad_norm": 4.3837666511535645, | |
"learning_rate": 1.8150731414862623e-05, | |
"loss": 16.4852, | |
"step": 2050 | |
}, | |
{ | |
"epoch": 13.798906184265881, | |
"eval_dim_128_cosine_accuracy": 0.8801467108376716, | |
"eval_dim_128_dot_accuracy": 0.12032654992901089, | |
"eval_dim_128_euclidean_accuracy": 0.8805016564126834, | |
"eval_dim_128_manhattan_accuracy": 0.8797917652626597, | |
"eval_dim_128_max_accuracy": 0.8805016564126834, | |
"eval_dim_256_cosine_accuracy": 0.8809749171793658, | |
"eval_dim_256_dot_accuracy": 0.119380028395646, | |
"eval_dim_256_euclidean_accuracy": 0.8814481779460482, | |
"eval_dim_256_manhattan_accuracy": 0.8807382867960246, | |
"eval_dim_256_max_accuracy": 0.8814481779460482, | |
"eval_dim_384_cosine_accuracy": 0.880028395646001, | |
"eval_dim_384_dot_accuracy": 0.11997160435399905, | |
"eval_dim_384_euclidean_accuracy": 0.880028395646001, | |
"eval_dim_384_manhattan_accuracy": 0.8794368196876479, | |
"eval_dim_384_max_accuracy": 0.880028395646001, | |
"eval_dim_64_cosine_accuracy": 0.8793185044959773, | |
"eval_dim_64_dot_accuracy": 0.12352106010411737, | |
"eval_dim_64_euclidean_accuracy": 0.8801467108376716, | |
"eval_dim_64_manhattan_accuracy": 0.8796734500709891, | |
"eval_dim_64_max_accuracy": 0.8801467108376716, | |
"eval_loss": 16.410737991333008, | |
"eval_runtime": 102.5333, | |
"eval_samples_per_second": 82.432, | |
"eval_sequential_score": 0.8793185044959773, | |
"eval_steps_per_second": 2.585, | |
"step": 2050 | |
}, | |
{ | |
"epoch": 14.135464871687, | |
"grad_norm": 4.87747859954834, | |
"learning_rate": 1.8011832481043577e-05, | |
"loss": 16.4526, | |
"step": 2100 | |
}, | |
{ | |
"epoch": 14.135464871687, | |
"eval_dim_128_cosine_accuracy": 0.8796734500709891, | |
"eval_dim_128_dot_accuracy": 0.12103644107903455, | |
"eval_dim_128_euclidean_accuracy": 0.8796734500709891, | |
"eval_dim_128_manhattan_accuracy": 0.8794368196876479, | |
"eval_dim_128_max_accuracy": 0.8796734500709891, | |
"eval_dim_256_cosine_accuracy": 0.8801467108376716, | |
"eval_dim_256_dot_accuracy": 0.12068149550402271, | |
"eval_dim_256_euclidean_accuracy": 0.8805016564126834, | |
"eval_dim_256_manhattan_accuracy": 0.8797917652626597, | |
"eval_dim_256_max_accuracy": 0.8805016564126834, | |
"eval_dim_384_cosine_accuracy": 0.8808566019876952, | |
"eval_dim_384_dot_accuracy": 0.11914339801230478, | |
"eval_dim_384_euclidean_accuracy": 0.8808566019876952, | |
"eval_dim_384_manhattan_accuracy": 0.8810932323710364, | |
"eval_dim_384_max_accuracy": 0.8810932323710364, | |
"eval_dim_64_cosine_accuracy": 0.8778987221959299, | |
"eval_dim_64_dot_accuracy": 0.12470421202082348, | |
"eval_dim_64_euclidean_accuracy": 0.8781353525792712, | |
"eval_dim_64_manhattan_accuracy": 0.879081874112636, | |
"eval_dim_64_max_accuracy": 0.879081874112636, | |
"eval_loss": 16.392879486083984, | |
"eval_runtime": 103.5589, | |
"eval_samples_per_second": 81.615, | |
"eval_sequential_score": 0.8778987221959299, | |
"eval_steps_per_second": 2.559, | |
"step": 2100 | |
}, | |
{ | |
"epoch": 14.47202355910812, | |
"grad_norm": 6.463150978088379, | |
"learning_rate": 1.78684769537159e-05, | |
"loss": 16.4343, | |
"step": 2150 | |
}, | |
{ | |
"epoch": 14.47202355910812, | |
"eval_dim_128_cosine_accuracy": 0.8788452437292948, | |
"eval_dim_128_dot_accuracy": 0.12221959299574066, | |
"eval_dim_128_euclidean_accuracy": 0.878490298154283, | |
"eval_dim_128_manhattan_accuracy": 0.8797917652626597, | |
"eval_dim_128_max_accuracy": 0.8797917652626597, | |
"eval_dim_256_cosine_accuracy": 0.879081874112636, | |
"eval_dim_256_dot_accuracy": 0.121509701845717, | |
"eval_dim_256_euclidean_accuracy": 0.8786086133459536, | |
"eval_dim_256_manhattan_accuracy": 0.8797917652626597, | |
"eval_dim_256_max_accuracy": 0.8797917652626597, | |
"eval_dim_384_cosine_accuracy": 0.8796734500709891, | |
"eval_dim_384_dot_accuracy": 0.12032654992901089, | |
"eval_dim_384_euclidean_accuracy": 0.8796734500709891, | |
"eval_dim_384_manhattan_accuracy": 0.880619971604354, | |
"eval_dim_384_max_accuracy": 0.880619971604354, | |
"eval_dim_64_cosine_accuracy": 0.8774254614292475, | |
"eval_dim_64_dot_accuracy": 0.1250591575958353, | |
"eval_dim_64_euclidean_accuracy": 0.8774254614292475, | |
"eval_dim_64_manhattan_accuracy": 0.8780170373876006, | |
"eval_dim_64_max_accuracy": 0.8780170373876006, | |
"eval_loss": 16.40749740600586, | |
"eval_runtime": 102.9532, | |
"eval_samples_per_second": 82.096, | |
"eval_sequential_score": 0.8774254614292475, | |
"eval_steps_per_second": 2.574, | |
"step": 2150 | |
}, | |
{ | |
"epoch": 14.80858224652924, | |
"grad_norm": 4.839356422424316, | |
"learning_rate": 1.7720744574600865e-05, | |
"loss": 16.4244, | |
"step": 2200 | |
}, | |
{ | |
"epoch": 14.80858224652924, | |
"eval_dim_128_cosine_accuracy": 0.8803833412210128, | |
"eval_dim_128_dot_accuracy": 0.11973497397065783, | |
"eval_dim_128_euclidean_accuracy": 0.880619971604354, | |
"eval_dim_128_manhattan_accuracy": 0.8809749171793658, | |
"eval_dim_128_max_accuracy": 0.8809749171793658, | |
"eval_dim_256_cosine_accuracy": 0.8819214387127308, | |
"eval_dim_256_dot_accuracy": 0.119380028395646, | |
"eval_dim_256_euclidean_accuracy": 0.8815664931377188, | |
"eval_dim_256_manhattan_accuracy": 0.8814481779460482, | |
"eval_dim_256_max_accuracy": 0.8819214387127308, | |
"eval_dim_384_cosine_accuracy": 0.8820397539044014, | |
"eval_dim_384_dot_accuracy": 0.11796024609559867, | |
"eval_dim_384_euclidean_accuracy": 0.8820397539044014, | |
"eval_dim_384_manhattan_accuracy": 0.882158069096072, | |
"eval_dim_384_max_accuracy": 0.882158069096072, | |
"eval_dim_64_cosine_accuracy": 0.8808566019876952, | |
"eval_dim_64_dot_accuracy": 0.12221959299574066, | |
"eval_dim_64_euclidean_accuracy": 0.880619971604354, | |
"eval_dim_64_manhattan_accuracy": 0.8786086133459536, | |
"eval_dim_64_max_accuracy": 0.8808566019876952, | |
"eval_loss": 16.402673721313477, | |
"eval_runtime": 103.4179, | |
"eval_samples_per_second": 81.727, | |
"eval_sequential_score": 0.8808566019876952, | |
"eval_steps_per_second": 2.562, | |
"step": 2200 | |
}, | |
{ | |
"epoch": 15.145140933950358, | |
"grad_norm": 5.812349796295166, | |
"learning_rate": 1.756871752004992e-05, | |
"loss": 16.3947, | |
"step": 2250 | |
}, | |
{ | |
"epoch": 15.145140933950358, | |
"eval_dim_128_cosine_accuracy": 0.879081874112636, | |
"eval_dim_128_dot_accuracy": 0.12316611452910553, | |
"eval_dim_128_euclidean_accuracy": 0.8786086133459536, | |
"eval_dim_128_manhattan_accuracy": 0.8809749171793658, | |
"eval_dim_128_max_accuracy": 0.8809749171793658, | |
"eval_dim_256_cosine_accuracy": 0.8792001893043067, | |
"eval_dim_256_dot_accuracy": 0.12139138665404638, | |
"eval_dim_256_euclidean_accuracy": 0.8801467108376716, | |
"eval_dim_256_manhattan_accuracy": 0.8813298627543776, | |
"eval_dim_256_max_accuracy": 0.8813298627543776, | |
"eval_dim_384_cosine_accuracy": 0.8802650260293422, | |
"eval_dim_384_dot_accuracy": 0.11973497397065783, | |
"eval_dim_384_euclidean_accuracy": 0.8802650260293422, | |
"eval_dim_384_manhattan_accuracy": 0.8808566019876952, | |
"eval_dim_384_max_accuracy": 0.8808566019876952, | |
"eval_dim_64_cosine_accuracy": 0.8773071462375769, | |
"eval_dim_64_dot_accuracy": 0.12695220066256507, | |
"eval_dim_64_euclidean_accuracy": 0.8768338854708945, | |
"eval_dim_64_manhattan_accuracy": 0.8787269285376242, | |
"eval_dim_64_max_accuracy": 0.8787269285376242, | |
"eval_loss": 16.4101619720459, | |
"eval_runtime": 105.1832, | |
"eval_samples_per_second": 80.355, | |
"eval_sequential_score": 0.8773071462375769, | |
"eval_steps_per_second": 2.519, | |
"step": 2250 | |
}, | |
{ | |
"epoch": 15.481699621371476, | |
"grad_norm": 4.386394023895264, | |
"learning_rate": 1.7412480355334006e-05, | |
"loss": 16.3827, | |
"step": 2300 | |
}, | |
{ | |
"epoch": 15.481699621371476, | |
"eval_dim_128_cosine_accuracy": 0.8803833412210128, | |
"eval_dim_128_dot_accuracy": 0.12245622337908188, | |
"eval_dim_128_euclidean_accuracy": 0.880619971604354, | |
"eval_dim_128_manhattan_accuracy": 0.88180312352106, | |
"eval_dim_128_max_accuracy": 0.88180312352106, | |
"eval_dim_256_cosine_accuracy": 0.8813298627543776, | |
"eval_dim_256_dot_accuracy": 0.12079981069569333, | |
"eval_dim_256_euclidean_accuracy": 0.8809749171793658, | |
"eval_dim_256_manhattan_accuracy": 0.8819214387127308, | |
"eval_dim_256_max_accuracy": 0.8819214387127308, | |
"eval_dim_384_cosine_accuracy": 0.8813298627543776, | |
"eval_dim_384_dot_accuracy": 0.11867013724562234, | |
"eval_dim_384_euclidean_accuracy": 0.8813298627543776, | |
"eval_dim_384_manhattan_accuracy": 0.8809749171793658, | |
"eval_dim_384_max_accuracy": 0.8813298627543776, | |
"eval_dim_64_cosine_accuracy": 0.8781353525792712, | |
"eval_dim_64_dot_accuracy": 0.1260056791292002, | |
"eval_dim_64_euclidean_accuracy": 0.8789635589209654, | |
"eval_dim_64_manhattan_accuracy": 0.8814481779460482, | |
"eval_dim_64_max_accuracy": 0.8814481779460482, | |
"eval_loss": 16.404207229614258, | |
"eval_runtime": 101.3893, | |
"eval_samples_per_second": 83.362, | |
"eval_sequential_score": 0.8781353525792712, | |
"eval_steps_per_second": 2.614, | |
"step": 2300 | |
}, | |
{ | |
"epoch": 15.818258308792595, | |
"grad_norm": 4.8762359619140625, | |
"learning_rate": 1.7252119987603976e-05, | |
"loss": 16.3719, | |
"step": 2350 | |
}, | |
{ | |
"epoch": 15.818258308792595, | |
"eval_dim_128_cosine_accuracy": 0.8801467108376716, | |
"eval_dim_128_dot_accuracy": 0.12032654992901089, | |
"eval_dim_128_euclidean_accuracy": 0.8802650260293422, | |
"eval_dim_128_manhattan_accuracy": 0.8802650260293422, | |
"eval_dim_128_max_accuracy": 0.8802650260293422, | |
"eval_dim_256_cosine_accuracy": 0.88180312352106, | |
"eval_dim_256_dot_accuracy": 0.11878845243729295, | |
"eval_dim_256_euclidean_accuracy": 0.8820397539044014, | |
"eval_dim_256_manhattan_accuracy": 0.881211547562707, | |
"eval_dim_256_max_accuracy": 0.8820397539044014, | |
"eval_dim_384_cosine_accuracy": 0.8820397539044014, | |
"eval_dim_384_dot_accuracy": 0.11796024609559867, | |
"eval_dim_384_euclidean_accuracy": 0.8820397539044014, | |
"eval_dim_384_manhattan_accuracy": 0.8808566019876952, | |
"eval_dim_384_max_accuracy": 0.8820397539044014, | |
"eval_dim_64_cosine_accuracy": 0.879081874112636, | |
"eval_dim_64_dot_accuracy": 0.12458589682915286, | |
"eval_dim_64_euclidean_accuracy": 0.8805016564126834, | |
"eval_dim_64_manhattan_accuracy": 0.8797917652626597, | |
"eval_dim_64_max_accuracy": 0.8805016564126834, | |
"eval_loss": 16.40033721923828, | |
"eval_runtime": 104.1264, | |
"eval_samples_per_second": 81.171, | |
"eval_sequential_score": 0.879081874112636, | |
"eval_steps_per_second": 2.545, | |
"step": 2350 | |
}, | |
{ | |
"epoch": 16.154816996213714, | |
"grad_norm": 5.414395809173584, | |
"learning_rate": 1.7087725617548385e-05, | |
"loss": 16.3403, | |
"step": 2400 | |
}, | |
{ | |
"epoch": 16.154816996213714, | |
"eval_dim_128_cosine_accuracy": 0.8781353525792712, | |
"eval_dim_128_dot_accuracy": 0.12328442972077615, | |
"eval_dim_128_euclidean_accuracy": 0.8778987221959299, | |
"eval_dim_128_manhattan_accuracy": 0.8787269285376242, | |
"eval_dim_128_max_accuracy": 0.8787269285376242, | |
"eval_dim_256_cosine_accuracy": 0.879081874112636, | |
"eval_dim_256_dot_accuracy": 0.12210127780407004, | |
"eval_dim_256_euclidean_accuracy": 0.8788452437292948, | |
"eval_dim_256_manhattan_accuracy": 0.879081874112636, | |
"eval_dim_256_max_accuracy": 0.879081874112636, | |
"eval_dim_384_cosine_accuracy": 0.8799100804543304, | |
"eval_dim_384_dot_accuracy": 0.12008991954566967, | |
"eval_dim_384_euclidean_accuracy": 0.8799100804543304, | |
"eval_dim_384_manhattan_accuracy": 0.8794368196876479, | |
"eval_dim_384_max_accuracy": 0.8799100804543304, | |
"eval_dim_64_cosine_accuracy": 0.8767155702792239, | |
"eval_dim_64_dot_accuracy": 0.12766209181258872, | |
"eval_dim_64_euclidean_accuracy": 0.8765972550875533, | |
"eval_dim_64_manhattan_accuracy": 0.8769522006625651, | |
"eval_dim_64_max_accuracy": 0.8769522006625651, | |
"eval_loss": 16.413236618041992, | |
"eval_runtime": 105.1626, | |
"eval_samples_per_second": 80.371, | |
"eval_sequential_score": 0.8767155702792239, | |
"eval_steps_per_second": 2.52, | |
"step": 2400 | |
}, | |
{ | |
"epoch": 16.491375683634836, | |
"grad_norm": 4.138753414154053, | |
"learning_rate": 1.6919388689775463e-05, | |
"loss": 16.3357, | |
"step": 2450 | |
}, | |
{ | |
"epoch": 16.491375683634836, | |
"eval_dim_128_cosine_accuracy": 0.8803833412210128, | |
"eval_dim_128_dot_accuracy": 0.1216280170373876, | |
"eval_dim_128_euclidean_accuracy": 0.879081874112636, | |
"eval_dim_128_manhattan_accuracy": 0.8802650260293422, | |
"eval_dim_128_max_accuracy": 0.8803833412210128, | |
"eval_dim_256_cosine_accuracy": 0.8808566019876952, | |
"eval_dim_256_dot_accuracy": 0.121509701845717, | |
"eval_dim_256_euclidean_accuracy": 0.8802650260293422, | |
"eval_dim_256_manhattan_accuracy": 0.8807382867960246, | |
"eval_dim_256_max_accuracy": 0.8808566019876952, | |
"eval_dim_384_cosine_accuracy": 0.8807382867960246, | |
"eval_dim_384_dot_accuracy": 0.11926171320397538, | |
"eval_dim_384_euclidean_accuracy": 0.8807382867960246, | |
"eval_dim_384_manhattan_accuracy": 0.8801467108376716, | |
"eval_dim_384_max_accuracy": 0.8807382867960246, | |
"eval_dim_64_cosine_accuracy": 0.8792001893043067, | |
"eval_dim_64_dot_accuracy": 0.12647893989588263, | |
"eval_dim_64_euclidean_accuracy": 0.8786086133459536, | |
"eval_dim_64_manhattan_accuracy": 0.8807382867960246, | |
"eval_dim_64_max_accuracy": 0.8807382867960246, | |
"eval_loss": 16.414878845214844, | |
"eval_runtime": 100.6398, | |
"eval_samples_per_second": 83.983, | |
"eval_sequential_score": 0.8792001893043067, | |
"eval_steps_per_second": 2.633, | |
"step": 2450 | |
}, | |
{ | |
"epoch": 16.827934371055953, | |
"grad_norm": 4.080146312713623, | |
"learning_rate": 1.6747202841946928e-05, | |
"loss": 16.3203, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 16.827934371055953, | |
"eval_dim_128_cosine_accuracy": 0.8803833412210128, | |
"eval_dim_128_dot_accuracy": 0.12186464742072882, | |
"eval_dim_128_euclidean_accuracy": 0.8814481779460482, | |
"eval_dim_128_manhattan_accuracy": 0.880619971604354, | |
"eval_dim_128_max_accuracy": 0.8814481779460482, | |
"eval_dim_256_cosine_accuracy": 0.8814481779460482, | |
"eval_dim_256_dot_accuracy": 0.12044486512068149, | |
"eval_dim_256_euclidean_accuracy": 0.8820397539044014, | |
"eval_dim_256_manhattan_accuracy": 0.8825130146710838, | |
"eval_dim_256_max_accuracy": 0.8825130146710838, | |
"eval_dim_384_cosine_accuracy": 0.8815664931377188, | |
"eval_dim_384_dot_accuracy": 0.11843350686228112, | |
"eval_dim_384_euclidean_accuracy": 0.8815664931377188, | |
"eval_dim_384_manhattan_accuracy": 0.8815664931377188, | |
"eval_dim_384_max_accuracy": 0.8815664931377188, | |
"eval_dim_64_cosine_accuracy": 0.879081874112636, | |
"eval_dim_64_dot_accuracy": 0.12565073355418835, | |
"eval_dim_64_euclidean_accuracy": 0.880028395646001, | |
"eval_dim_64_manhattan_accuracy": 0.8810932323710364, | |
"eval_dim_64_max_accuracy": 0.8810932323710364, | |
"eval_loss": 16.408126831054688, | |
"eval_runtime": 103.4973, | |
"eval_samples_per_second": 81.664, | |
"eval_sequential_score": 0.879081874112636, | |
"eval_steps_per_second": 2.56, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 17.16449305847707, | |
"grad_norm": 5.26322078704834, | |
"learning_rate": 1.6571263852691887e-05, | |
"loss": 16.2986, | |
"step": 2550 | |
}, | |
{ | |
"epoch": 17.16449305847707, | |
"eval_dim_128_cosine_accuracy": 0.8797917652626597, | |
"eval_dim_128_dot_accuracy": 0.12304779933743493, | |
"eval_dim_128_euclidean_accuracy": 0.8801467108376716, | |
"eval_dim_128_manhattan_accuracy": 0.8778987221959299, | |
"eval_dim_128_max_accuracy": 0.8801467108376716, | |
"eval_dim_256_cosine_accuracy": 0.880028395646001, | |
"eval_dim_256_dot_accuracy": 0.12068149550402271, | |
"eval_dim_256_euclidean_accuracy": 0.880619971604354, | |
"eval_dim_256_manhattan_accuracy": 0.8805016564126834, | |
"eval_dim_256_max_accuracy": 0.880619971604354, | |
"eval_dim_384_cosine_accuracy": 0.8820397539044014, | |
"eval_dim_384_dot_accuracy": 0.11796024609559867, | |
"eval_dim_384_euclidean_accuracy": 0.8820397539044014, | |
"eval_dim_384_manhattan_accuracy": 0.8808566019876952, | |
"eval_dim_384_max_accuracy": 0.8820397539044014, | |
"eval_dim_64_cosine_accuracy": 0.879081874112636, | |
"eval_dim_64_dot_accuracy": 0.12707051585423568, | |
"eval_dim_64_euclidean_accuracy": 0.8781353525792712, | |
"eval_dim_64_manhattan_accuracy": 0.8778987221959299, | |
"eval_dim_64_max_accuracy": 0.879081874112636, | |
"eval_loss": 16.413921356201172, | |
"eval_runtime": 103.9357, | |
"eval_samples_per_second": 81.32, | |
"eval_sequential_score": 0.879081874112636, | |
"eval_steps_per_second": 2.55, | |
"step": 2550 | |
}, | |
{ | |
"epoch": 17.50105174589819, | |
"grad_norm": 9.353097915649414, | |
"learning_rate": 1.639166958832985e-05, | |
"loss": 16.2923, | |
"step": 2600 | |
}, | |
{ | |
"epoch": 17.50105174589819, | |
"eval_dim_128_cosine_accuracy": 0.8786086133459536, | |
"eval_dim_128_dot_accuracy": 0.12352106010411737, | |
"eval_dim_128_euclidean_accuracy": 0.8783719829626124, | |
"eval_dim_128_manhattan_accuracy": 0.8807382867960246, | |
"eval_dim_128_max_accuracy": 0.8807382867960246, | |
"eval_dim_256_cosine_accuracy": 0.8792001893043067, | |
"eval_dim_256_dot_accuracy": 0.12103644107903455, | |
"eval_dim_256_euclidean_accuracy": 0.8796734500709891, | |
"eval_dim_256_manhattan_accuracy": 0.8810932323710364, | |
"eval_dim_256_max_accuracy": 0.8810932323710364, | |
"eval_dim_384_cosine_accuracy": 0.8799100804543304, | |
"eval_dim_384_dot_accuracy": 0.12008991954566967, | |
"eval_dim_384_euclidean_accuracy": 0.8799100804543304, | |
"eval_dim_384_manhattan_accuracy": 0.880028395646001, | |
"eval_dim_384_max_accuracy": 0.880028395646001, | |
"eval_dim_64_cosine_accuracy": 0.8768338854708945, | |
"eval_dim_64_dot_accuracy": 0.12754377662091812, | |
"eval_dim_64_euclidean_accuracy": 0.8762423095125415, | |
"eval_dim_64_manhattan_accuracy": 0.8789635589209654, | |
"eval_dim_64_max_accuracy": 0.8789635589209654, | |
"eval_loss": 16.406217575073242, | |
"eval_runtime": 101.8719, | |
"eval_samples_per_second": 82.967, | |
"eval_sequential_score": 0.8768338854708945, | |
"eval_steps_per_second": 2.601, | |
"step": 2600 | |
}, | |
{ | |
"epoch": 17.83761043331931, | |
"grad_norm": 5.8258891105651855, | |
"learning_rate": 1.6208519948432438e-05, | |
"loss": 16.2649, | |
"step": 2650 | |
}, | |
{ | |
"epoch": 17.83761043331931, | |
"eval_dim_128_cosine_accuracy": 0.880028395646001, | |
"eval_dim_128_dot_accuracy": 0.12186464742072882, | |
"eval_dim_128_euclidean_accuracy": 0.8803833412210128, | |
"eval_dim_128_manhattan_accuracy": 0.8799100804543304, | |
"eval_dim_128_max_accuracy": 0.8803833412210128, | |
"eval_dim_256_cosine_accuracy": 0.8807382867960246, | |
"eval_dim_256_dot_accuracy": 0.12210127780407004, | |
"eval_dim_256_euclidean_accuracy": 0.8814481779460482, | |
"eval_dim_256_manhattan_accuracy": 0.8810932323710364, | |
"eval_dim_256_max_accuracy": 0.8814481779460482, | |
"eval_dim_384_cosine_accuracy": 0.8814481779460482, | |
"eval_dim_384_dot_accuracy": 0.11855182205395173, | |
"eval_dim_384_euclidean_accuracy": 0.8814481779460482, | |
"eval_dim_384_manhattan_accuracy": 0.8814481779460482, | |
"eval_dim_384_max_accuracy": 0.8814481779460482, | |
"eval_dim_64_cosine_accuracy": 0.8787269285376242, | |
"eval_dim_64_dot_accuracy": 0.1283719829626124, | |
"eval_dim_64_euclidean_accuracy": 0.8788452437292948, | |
"eval_dim_64_manhattan_accuracy": 0.8799100804543304, | |
"eval_dim_64_max_accuracy": 0.8799100804543304, | |
"eval_loss": 16.410572052001953, | |
"eval_runtime": 101.9269, | |
"eval_samples_per_second": 82.922, | |
"eval_sequential_score": 0.8787269285376242, | |
"eval_steps_per_second": 2.6, | |
"step": 2650 | |
}, | |
{ | |
"epoch": 18.17416912074043, | |
"grad_norm": 4.463468074798584, | |
"learning_rate": 1.6021916810254096e-05, | |
"loss": 16.2505, | |
"step": 2700 | |
}, | |
{ | |
"epoch": 18.17416912074043, | |
"eval_dim_128_cosine_accuracy": 0.8786086133459536, | |
"eval_dim_128_dot_accuracy": 0.12411263606247042, | |
"eval_dim_128_euclidean_accuracy": 0.8780170373876006, | |
"eval_dim_128_manhattan_accuracy": 0.8792001893043067, | |
"eval_dim_128_max_accuracy": 0.8792001893043067, | |
"eval_dim_256_cosine_accuracy": 0.8793185044959773, | |
"eval_dim_256_dot_accuracy": 0.12210127780407004, | |
"eval_dim_256_euclidean_accuracy": 0.8793185044959773, | |
"eval_dim_256_manhattan_accuracy": 0.8805016564126834, | |
"eval_dim_256_max_accuracy": 0.8805016564126834, | |
"eval_dim_384_cosine_accuracy": 0.8802650260293422, | |
"eval_dim_384_dot_accuracy": 0.11973497397065783, | |
"eval_dim_384_euclidean_accuracy": 0.8802650260293422, | |
"eval_dim_384_manhattan_accuracy": 0.8813298627543776, | |
"eval_dim_384_max_accuracy": 0.8813298627543776, | |
"eval_dim_64_cosine_accuracy": 0.8770705158542357, | |
"eval_dim_64_dot_accuracy": 0.13014671083767157, | |
"eval_dim_64_euclidean_accuracy": 0.8761239943208708, | |
"eval_dim_64_manhattan_accuracy": 0.8787269285376242, | |
"eval_dim_64_max_accuracy": 0.8787269285376242, | |
"eval_loss": 16.418752670288086, | |
"eval_runtime": 106.398, | |
"eval_samples_per_second": 79.438, | |
"eval_sequential_score": 0.8770705158542357, | |
"eval_steps_per_second": 2.491, | |
"step": 2700 | |
}, | |
{ | |
"epoch": 18.510727808161548, | |
"grad_norm": 5.066239833831787, | |
"learning_rate": 1.5831963972062734e-05, | |
"loss": 16.226, | |
"step": 2750 | |
}, | |
{ | |
"epoch": 18.510727808161548, | |
"eval_dim_128_cosine_accuracy": 0.8770705158542357, | |
"eval_dim_128_dot_accuracy": 0.12446758163748226, | |
"eval_dim_128_euclidean_accuracy": 0.8771888310459063, | |
"eval_dim_128_manhattan_accuracy": 0.8778987221959299, | |
"eval_dim_128_max_accuracy": 0.8778987221959299, | |
"eval_dim_256_cosine_accuracy": 0.8781353525792712, | |
"eval_dim_256_dot_accuracy": 0.12304779933743493, | |
"eval_dim_256_euclidean_accuracy": 0.8788452437292948, | |
"eval_dim_256_manhattan_accuracy": 0.8799100804543304, | |
"eval_dim_256_max_accuracy": 0.8799100804543304, | |
"eval_dim_384_cosine_accuracy": 0.8780170373876006, | |
"eval_dim_384_dot_accuracy": 0.12198296261239944, | |
"eval_dim_384_euclidean_accuracy": 0.8780170373876006, | |
"eval_dim_384_manhattan_accuracy": 0.8770705158542357, | |
"eval_dim_384_max_accuracy": 0.8780170373876006, | |
"eval_dim_64_cosine_accuracy": 0.8765972550875533, | |
"eval_dim_64_dot_accuracy": 0.12884524372929484, | |
"eval_dim_64_euclidean_accuracy": 0.8765972550875533, | |
"eval_dim_64_manhattan_accuracy": 0.8778987221959299, | |
"eval_dim_64_max_accuracy": 0.8778987221959299, | |
"eval_loss": 16.4149112701416, | |
"eval_runtime": 101.2915, | |
"eval_samples_per_second": 83.442, | |
"eval_sequential_score": 0.8765972550875533, | |
"eval_steps_per_second": 2.616, | |
"step": 2750 | |
}, | |
{ | |
"epoch": 18.84728649558267, | |
"grad_norm": 4.982476234436035, | |
"learning_rate": 1.5638767095401778e-05, | |
"loss": 16.2106, | |
"step": 2800 | |
}, | |
{ | |
"epoch": 18.84728649558267, | |
"eval_dim_128_cosine_accuracy": 0.8780170373876006, | |
"eval_dim_128_dot_accuracy": 0.12529578797917654, | |
"eval_dim_128_euclidean_accuracy": 0.878490298154283, | |
"eval_dim_128_manhattan_accuracy": 0.8794368196876479, | |
"eval_dim_128_max_accuracy": 0.8794368196876479, | |
"eval_dim_256_cosine_accuracy": 0.8799100804543304, | |
"eval_dim_256_dot_accuracy": 0.1226928537624231, | |
"eval_dim_256_euclidean_accuracy": 0.8797917652626597, | |
"eval_dim_256_manhattan_accuracy": 0.8801467108376716, | |
"eval_dim_256_max_accuracy": 0.8801467108376716, | |
"eval_dim_384_cosine_accuracy": 0.879081874112636, | |
"eval_dim_384_dot_accuracy": 0.12091812588736393, | |
"eval_dim_384_euclidean_accuracy": 0.879081874112636, | |
"eval_dim_384_manhattan_accuracy": 0.8794368196876479, | |
"eval_dim_384_max_accuracy": 0.8794368196876479, | |
"eval_dim_64_cosine_accuracy": 0.8767155702792239, | |
"eval_dim_64_dot_accuracy": 0.13002839564600094, | |
"eval_dim_64_euclidean_accuracy": 0.8768338854708945, | |
"eval_dim_64_manhattan_accuracy": 0.8778987221959299, | |
"eval_dim_64_max_accuracy": 0.8778987221959299, | |
"eval_loss": 16.423009872436523, | |
"eval_runtime": 103.6087, | |
"eval_samples_per_second": 81.576, | |
"eval_sequential_score": 0.8767155702792239, | |
"eval_steps_per_second": 2.558, | |
"step": 2800 | |
}, | |
{ | |
"epoch": 19.183845183003786, | |
"grad_norm": 6.176373481750488, | |
"learning_rate": 1.5442433646315792e-05, | |
"loss": 16.2052, | |
"step": 2850 | |
}, | |
{ | |
"epoch": 19.183845183003786, | |
"eval_dim_128_cosine_accuracy": 0.8769522006625651, | |
"eval_dim_128_dot_accuracy": 0.12576904874585898, | |
"eval_dim_128_euclidean_accuracy": 0.8758873639375295, | |
"eval_dim_128_manhattan_accuracy": 0.8793185044959773, | |
"eval_dim_128_max_accuracy": 0.8793185044959773, | |
"eval_dim_256_cosine_accuracy": 0.8776620918125887, | |
"eval_dim_256_dot_accuracy": 0.12328442972077615, | |
"eval_dim_256_euclidean_accuracy": 0.8778987221959299, | |
"eval_dim_256_manhattan_accuracy": 0.8796734500709891, | |
"eval_dim_256_max_accuracy": 0.8796734500709891, | |
"eval_dim_384_cosine_accuracy": 0.878490298154283, | |
"eval_dim_384_dot_accuracy": 0.121509701845717, | |
"eval_dim_384_euclidean_accuracy": 0.878490298154283, | |
"eval_dim_384_manhattan_accuracy": 0.8814481779460482, | |
"eval_dim_384_max_accuracy": 0.8814481779460482, | |
"eval_dim_64_cosine_accuracy": 0.8744675816374823, | |
"eval_dim_64_dot_accuracy": 0.13369616658778988, | |
"eval_dim_64_euclidean_accuracy": 0.8742309512541411, | |
"eval_dim_64_manhattan_accuracy": 0.8781353525792712, | |
"eval_dim_64_max_accuracy": 0.8781353525792712, | |
"eval_loss": 16.435117721557617, | |
"eval_runtime": 104.4101, | |
"eval_samples_per_second": 80.95, | |
"eval_sequential_score": 0.8744675816374823, | |
"eval_steps_per_second": 2.538, | |
"step": 2850 | |
}, | |
{ | |
"epoch": 19.520403870424904, | |
"grad_norm": 7.323819160461426, | |
"learning_rate": 1.5243072835572319e-05, | |
"loss": 16.186, | |
"step": 2900 | |
}, | |
{ | |
"epoch": 19.520403870424904, | |
"eval_dim_128_cosine_accuracy": 0.8776620918125887, | |
"eval_dim_128_dot_accuracy": 0.12363937529578797, | |
"eval_dim_128_euclidean_accuracy": 0.8776620918125887, | |
"eval_dim_128_manhattan_accuracy": 0.876360624704212, | |
"eval_dim_128_max_accuracy": 0.8776620918125887, | |
"eval_dim_256_cosine_accuracy": 0.8793185044959773, | |
"eval_dim_256_dot_accuracy": 0.12198296261239944, | |
"eval_dim_256_euclidean_accuracy": 0.8789635589209654, | |
"eval_dim_256_manhattan_accuracy": 0.8777804070042593, | |
"eval_dim_256_max_accuracy": 0.8793185044959773, | |
"eval_dim_384_cosine_accuracy": 0.8792001893043067, | |
"eval_dim_384_dot_accuracy": 0.12079981069569333, | |
"eval_dim_384_euclidean_accuracy": 0.8792001893043067, | |
"eval_dim_384_manhattan_accuracy": 0.8789635589209654, | |
"eval_dim_384_max_accuracy": 0.8792001893043067, | |
"eval_dim_64_cosine_accuracy": 0.8762423095125415, | |
"eval_dim_64_dot_accuracy": 0.13097491717936582, | |
"eval_dim_64_euclidean_accuracy": 0.8748225272124941, | |
"eval_dim_64_manhattan_accuracy": 0.8782536677709418, | |
"eval_dim_64_max_accuracy": 0.8782536677709418, | |
"eval_loss": 16.433080673217773, | |
"eval_runtime": 101.0285, | |
"eval_samples_per_second": 83.66, | |
"eval_sequential_score": 0.8762423095125415, | |
"eval_steps_per_second": 2.623, | |
"step": 2900 | |
}, | |
{ | |
"epoch": 19.856962557846025, | |
"grad_norm": 6.637113571166992, | |
"learning_rate": 1.5040795557913246e-05, | |
"loss": 16.1496, | |
"step": 2950 | |
}, | |
{ | |
"epoch": 19.856962557846025, | |
"eval_dim_128_cosine_accuracy": 0.8774254614292475, | |
"eval_dim_128_dot_accuracy": 0.12529578797917654, | |
"eval_dim_128_euclidean_accuracy": 0.8770705158542357, | |
"eval_dim_128_manhattan_accuracy": 0.8782536677709418, | |
"eval_dim_128_max_accuracy": 0.8782536677709418, | |
"eval_dim_256_cosine_accuracy": 0.8781353525792712, | |
"eval_dim_256_dot_accuracy": 0.12375769048745859, | |
"eval_dim_256_euclidean_accuracy": 0.8783719829626124, | |
"eval_dim_256_manhattan_accuracy": 0.879081874112636, | |
"eval_dim_256_max_accuracy": 0.879081874112636, | |
"eval_dim_384_cosine_accuracy": 0.8780170373876006, | |
"eval_dim_384_dot_accuracy": 0.12198296261239944, | |
"eval_dim_384_euclidean_accuracy": 0.8780170373876006, | |
"eval_dim_384_manhattan_accuracy": 0.8786086133459536, | |
"eval_dim_384_max_accuracy": 0.8786086133459536, | |
"eval_dim_64_cosine_accuracy": 0.8770705158542357, | |
"eval_dim_64_dot_accuracy": 0.13357785139611927, | |
"eval_dim_64_euclidean_accuracy": 0.8756507335541883, | |
"eval_dim_64_manhattan_accuracy": 0.8775437766209181, | |
"eval_dim_64_max_accuracy": 0.8775437766209181, | |
"eval_loss": 16.437721252441406, | |
"eval_runtime": 103.9645, | |
"eval_samples_per_second": 81.297, | |
"eval_sequential_score": 0.8770705158542357, | |
"eval_steps_per_second": 2.549, | |
"step": 2950 | |
}, | |
{ | |
"epoch": 20.193521245267142, | |
"grad_norm": 4.9336957931518555, | |
"learning_rate": 1.4835714330369445e-05, | |
"loss": 16.151, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 20.193521245267142, | |
"eval_dim_128_cosine_accuracy": 0.8765972550875533, | |
"eval_dim_128_dot_accuracy": 0.1261239943208708, | |
"eval_dim_128_euclidean_accuracy": 0.8761239943208708, | |
"eval_dim_128_manhattan_accuracy": 0.8797917652626597, | |
"eval_dim_128_max_accuracy": 0.8797917652626597, | |
"eval_dim_256_cosine_accuracy": 0.8780170373876006, | |
"eval_dim_256_dot_accuracy": 0.12245622337908188, | |
"eval_dim_256_euclidean_accuracy": 0.8771888310459063, | |
"eval_dim_256_manhattan_accuracy": 0.8801467108376716, | |
"eval_dim_256_max_accuracy": 0.8801467108376716, | |
"eval_dim_384_cosine_accuracy": 0.8780170373876006, | |
"eval_dim_384_dot_accuracy": 0.12198296261239944, | |
"eval_dim_384_euclidean_accuracy": 0.8780170373876006, | |
"eval_dim_384_manhattan_accuracy": 0.8819214387127308, | |
"eval_dim_384_max_accuracy": 0.8819214387127308, | |
"eval_dim_64_cosine_accuracy": 0.8750591575958353, | |
"eval_dim_64_dot_accuracy": 0.1361807856128727, | |
"eval_dim_64_euclidean_accuracy": 0.8730477993374349, | |
"eval_dim_64_manhattan_accuracy": 0.878490298154283, | |
"eval_dim_64_max_accuracy": 0.878490298154283, | |
"eval_loss": 16.44074821472168, | |
"eval_runtime": 101.9564, | |
"eval_samples_per_second": 82.898, | |
"eval_sequential_score": 0.8750591575958353, | |
"eval_steps_per_second": 2.599, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 20.530079932688263, | |
"grad_norm": 5.225156784057617, | |
"learning_rate": 1.4627943229672992e-05, | |
"loss": 16.1081, | |
"step": 3050 | |
}, | |
{ | |
"epoch": 20.530079932688263, | |
"eval_dim_128_cosine_accuracy": 0.8758873639375295, | |
"eval_dim_128_dot_accuracy": 0.1261239943208708, | |
"eval_dim_128_euclidean_accuracy": 0.8758873639375295, | |
"eval_dim_128_manhattan_accuracy": 0.8781353525792712, | |
"eval_dim_128_max_accuracy": 0.8781353525792712, | |
"eval_dim_256_cosine_accuracy": 0.8775437766209181, | |
"eval_dim_256_dot_accuracy": 0.12245622337908188, | |
"eval_dim_256_euclidean_accuracy": 0.8778987221959299, | |
"eval_dim_256_manhattan_accuracy": 0.8776620918125887, | |
"eval_dim_256_max_accuracy": 0.8778987221959299, | |
"eval_dim_384_cosine_accuracy": 0.8774254614292475, | |
"eval_dim_384_dot_accuracy": 0.12257453857075248, | |
"eval_dim_384_euclidean_accuracy": 0.8774254614292475, | |
"eval_dim_384_manhattan_accuracy": 0.8788452437292948, | |
"eval_dim_384_max_accuracy": 0.8788452437292948, | |
"eval_dim_64_cosine_accuracy": 0.8749408424041647, | |
"eval_dim_64_dot_accuracy": 0.13712730714623758, | |
"eval_dim_64_euclidean_accuracy": 0.8743492664458117, | |
"eval_dim_64_manhattan_accuracy": 0.8765972550875533, | |
"eval_dim_64_max_accuracy": 0.8765972550875533, | |
"eval_loss": 16.442630767822266, | |
"eval_runtime": 104.3455, | |
"eval_samples_per_second": 81.0, | |
"eval_sequential_score": 0.8749408424041647, | |
"eval_steps_per_second": 2.54, | |
"step": 3050 | |
}, | |
{ | |
"epoch": 20.86663862010938, | |
"grad_norm": 4.5568132400512695, | |
"learning_rate": 1.4417597828801833e-05, | |
"loss": 16.0864, | |
"step": 3100 | |
}, | |
{ | |
"epoch": 20.86663862010938, | |
"eval_dim_128_cosine_accuracy": 0.8774254614292475, | |
"eval_dim_128_dot_accuracy": 0.12659725508755323, | |
"eval_dim_128_euclidean_accuracy": 0.8765972550875533, | |
"eval_dim_128_manhattan_accuracy": 0.879081874112636, | |
"eval_dim_128_max_accuracy": 0.879081874112636, | |
"eval_dim_256_cosine_accuracy": 0.8781353525792712, | |
"eval_dim_256_dot_accuracy": 0.12292948414576432, | |
"eval_dim_256_euclidean_accuracy": 0.8780170373876006, | |
"eval_dim_256_manhattan_accuracy": 0.880619971604354, | |
"eval_dim_256_max_accuracy": 0.880619971604354, | |
"eval_dim_384_cosine_accuracy": 0.8787269285376242, | |
"eval_dim_384_dot_accuracy": 0.12127307146237577, | |
"eval_dim_384_euclidean_accuracy": 0.8787269285376242, | |
"eval_dim_384_manhattan_accuracy": 0.8793185044959773, | |
"eval_dim_384_max_accuracy": 0.8793185044959773, | |
"eval_dim_64_cosine_accuracy": 0.8745858968291529, | |
"eval_dim_64_dot_accuracy": 0.13724562233790819, | |
"eval_dim_64_euclidean_accuracy": 0.8744675816374823, | |
"eval_dim_64_manhattan_accuracy": 0.8780170373876006, | |
"eval_dim_64_max_accuracy": 0.8780170373876006, | |
"eval_loss": 16.441152572631836, | |
"eval_runtime": 103.8678, | |
"eval_samples_per_second": 81.373, | |
"eval_sequential_score": 0.8745858968291529, | |
"eval_steps_per_second": 2.551, | |
"step": 3100 | |
}, | |
{ | |
"epoch": 21.203197307530502, | |
"grad_norm": 6.664557933807373, | |
"learning_rate": 1.4204795132692146e-05, | |
"loss": 16.0934, | |
"step": 3150 | |
}, | |
{ | |
"epoch": 21.203197307530502, | |
"eval_dim_128_cosine_accuracy": 0.8768338854708945, | |
"eval_dim_128_dot_accuracy": 0.12789872219592996, | |
"eval_dim_128_euclidean_accuracy": 0.8758873639375295, | |
"eval_dim_128_manhattan_accuracy": 0.8803833412210128, | |
"eval_dim_128_max_accuracy": 0.8803833412210128, | |
"eval_dim_256_cosine_accuracy": 0.8782536677709418, | |
"eval_dim_256_dot_accuracy": 0.12411263606247042, | |
"eval_dim_256_euclidean_accuracy": 0.8777804070042593, | |
"eval_dim_256_manhattan_accuracy": 0.88180312352106, | |
"eval_dim_256_max_accuracy": 0.88180312352106, | |
"eval_dim_384_cosine_accuracy": 0.8794368196876479, | |
"eval_dim_384_dot_accuracy": 0.12056318031235211, | |
"eval_dim_384_euclidean_accuracy": 0.8794368196876479, | |
"eval_dim_384_manhattan_accuracy": 0.881211547562707, | |
"eval_dim_384_max_accuracy": 0.881211547562707, | |
"eval_dim_64_cosine_accuracy": 0.8745858968291529, | |
"eval_dim_64_dot_accuracy": 0.14008518693800284, | |
"eval_dim_64_euclidean_accuracy": 0.8729294841457643, | |
"eval_dim_64_manhattan_accuracy": 0.8795551348793185, | |
"eval_dim_64_max_accuracy": 0.8795551348793185, | |
"eval_loss": 16.4547176361084, | |
"eval_runtime": 105.011, | |
"eval_samples_per_second": 80.487, | |
"eval_sequential_score": 0.8745858968291529, | |
"eval_steps_per_second": 2.524, | |
"step": 3150 | |
}, | |
{ | |
"epoch": 21.53975599495162, | |
"grad_norm": 6.669680118560791, | |
"learning_rate": 1.3989653513154165e-05, | |
"loss": 16.0382, | |
"step": 3200 | |
}, | |
{ | |
"epoch": 21.53975599495162, | |
"eval_dim_128_cosine_accuracy": 0.8742309512541411, | |
"eval_dim_128_dot_accuracy": 0.1283719829626124, | |
"eval_dim_128_euclidean_accuracy": 0.8738760056791292, | |
"eval_dim_128_manhattan_accuracy": 0.8748225272124941, | |
"eval_dim_128_max_accuracy": 0.8748225272124941, | |
"eval_dim_256_cosine_accuracy": 0.8751774727875059, | |
"eval_dim_256_dot_accuracy": 0.12446758163748226, | |
"eval_dim_256_euclidean_accuracy": 0.8754141031708471, | |
"eval_dim_256_manhattan_accuracy": 0.8765972550875533, | |
"eval_dim_256_max_accuracy": 0.8765972550875533, | |
"eval_dim_384_cosine_accuracy": 0.8765972550875533, | |
"eval_dim_384_dot_accuracy": 0.12340274491244675, | |
"eval_dim_384_euclidean_accuracy": 0.8765972550875533, | |
"eval_dim_384_manhattan_accuracy": 0.8761239943208708, | |
"eval_dim_384_max_accuracy": 0.8765972550875533, | |
"eval_dim_64_cosine_accuracy": 0.8723379081874113, | |
"eval_dim_64_dot_accuracy": 0.14020350212967345, | |
"eval_dim_64_euclidean_accuracy": 0.8703265499290109, | |
"eval_dim_64_manhattan_accuracy": 0.8754141031708471, | |
"eval_dim_64_max_accuracy": 0.8754141031708471, | |
"eval_loss": 16.458948135375977, | |
"eval_runtime": 101.007, | |
"eval_samples_per_second": 83.677, | |
"eval_sequential_score": 0.8723379081874113, | |
"eval_steps_per_second": 2.624, | |
"step": 3200 | |
}, | |
{ | |
"epoch": 21.87631468237274, | |
"grad_norm": 5.666304588317871, | |
"learning_rate": 1.37722926430277e-05, | |
"loss": 16.0279, | |
"step": 3250 | |
}, | |
{ | |
"epoch": 21.87631468237274, | |
"eval_dim_128_cosine_accuracy": 0.8751774727875059, | |
"eval_dim_128_dot_accuracy": 0.12979176526265973, | |
"eval_dim_128_euclidean_accuracy": 0.8743492664458117, | |
"eval_dim_128_manhattan_accuracy": 0.8771888310459063, | |
"eval_dim_128_max_accuracy": 0.8771888310459063, | |
"eval_dim_256_cosine_accuracy": 0.8765972550875533, | |
"eval_dim_256_dot_accuracy": 0.1260056791292002, | |
"eval_dim_256_euclidean_accuracy": 0.8761239943208708, | |
"eval_dim_256_manhattan_accuracy": 0.8796734500709891, | |
"eval_dim_256_max_accuracy": 0.8796734500709891, | |
"eval_dim_384_cosine_accuracy": 0.8773071462375769, | |
"eval_dim_384_dot_accuracy": 0.1226928537624231, | |
"eval_dim_384_euclidean_accuracy": 0.8773071462375769, | |
"eval_dim_384_manhattan_accuracy": 0.8777804070042593, | |
"eval_dim_384_max_accuracy": 0.8777804070042593, | |
"eval_dim_64_cosine_accuracy": 0.8728111689540937, | |
"eval_dim_64_dot_accuracy": 0.14221486038807382, | |
"eval_dim_64_euclidean_accuracy": 0.8732844297207761, | |
"eval_dim_64_manhattan_accuracy": 0.8776620918125887, | |
"eval_dim_64_max_accuracy": 0.8776620918125887, | |
"eval_loss": 16.46676254272461, | |
"eval_runtime": 102.9103, | |
"eval_samples_per_second": 82.13, | |
"eval_sequential_score": 0.8728111689540937, | |
"eval_steps_per_second": 2.575, | |
"step": 3250 | |
}, | |
{ | |
"epoch": 22.212873369793858, | |
"grad_norm": 6.600480556488037, | |
"learning_rate": 1.3552833429613939e-05, | |
"loss": 16.0327, | |
"step": 3300 | |
}, | |
{ | |
"epoch": 22.212873369793858, | |
"eval_dim_128_cosine_accuracy": 0.8742309512541411, | |
"eval_dim_128_dot_accuracy": 0.13002839564600094, | |
"eval_dim_128_euclidean_accuracy": 0.8742309512541411, | |
"eval_dim_128_manhattan_accuracy": 0.880028395646001, | |
"eval_dim_128_max_accuracy": 0.880028395646001, | |
"eval_dim_256_cosine_accuracy": 0.8768338854708945, | |
"eval_dim_256_dot_accuracy": 0.12363937529578797, | |
"eval_dim_256_euclidean_accuracy": 0.8764789398958827, | |
"eval_dim_256_manhattan_accuracy": 0.8814481779460482, | |
"eval_dim_256_max_accuracy": 0.8814481779460482, | |
"eval_dim_384_cosine_accuracy": 0.8773071462375769, | |
"eval_dim_384_dot_accuracy": 0.1226928537624231, | |
"eval_dim_384_euclidean_accuracy": 0.8773071462375769, | |
"eval_dim_384_manhattan_accuracy": 0.8807382867960246, | |
"eval_dim_384_max_accuracy": 0.8807382867960246, | |
"eval_dim_64_cosine_accuracy": 0.8726928537624231, | |
"eval_dim_64_dot_accuracy": 0.1432796971131093, | |
"eval_dim_64_euclidean_accuracy": 0.869971604353999, | |
"eval_dim_64_manhattan_accuracy": 0.8795551348793185, | |
"eval_dim_64_max_accuracy": 0.8795551348793185, | |
"eval_loss": 16.47365379333496, | |
"eval_runtime": 104.4255, | |
"eval_samples_per_second": 80.938, | |
"eval_sequential_score": 0.8726928537624231, | |
"eval_steps_per_second": 2.538, | |
"step": 3300 | |
}, | |
{ | |
"epoch": 22.549432057214975, | |
"grad_norm": 7.925108432769775, | |
"learning_rate": 1.3331397947420578e-05, | |
"loss": 15.979, | |
"step": 3350 | |
}, | |
{ | |
"epoch": 22.549432057214975, | |
"eval_dim_128_cosine_accuracy": 0.8739943208707998, | |
"eval_dim_128_dot_accuracy": 0.1293185044959773, | |
"eval_dim_128_euclidean_accuracy": 0.8732844297207761, | |
"eval_dim_128_manhattan_accuracy": 0.8782536677709418, | |
"eval_dim_128_max_accuracy": 0.8782536677709418, | |
"eval_dim_256_cosine_accuracy": 0.8770705158542357, | |
"eval_dim_256_dot_accuracy": 0.12328442972077615, | |
"eval_dim_256_euclidean_accuracy": 0.8773071462375769, | |
"eval_dim_256_manhattan_accuracy": 0.8793185044959773, | |
"eval_dim_256_max_accuracy": 0.8793185044959773, | |
"eval_dim_384_cosine_accuracy": 0.8770705158542357, | |
"eval_dim_384_dot_accuracy": 0.12292948414576432, | |
"eval_dim_384_euclidean_accuracy": 0.8770705158542357, | |
"eval_dim_384_manhattan_accuracy": 0.8778987221959299, | |
"eval_dim_384_max_accuracy": 0.8778987221959299, | |
"eval_dim_64_cosine_accuracy": 0.8722195929957407, | |
"eval_dim_64_dot_accuracy": 0.14162328442972077, | |
"eval_dim_64_euclidean_accuracy": 0.8700899195456696, | |
"eval_dim_64_manhattan_accuracy": 0.8767155702792239, | |
"eval_dim_64_max_accuracy": 0.8767155702792239, | |
"eval_loss": 16.468605041503906, | |
"eval_runtime": 101.8518, | |
"eval_samples_per_second": 82.983, | |
"eval_sequential_score": 0.8722195929957407, | |
"eval_steps_per_second": 2.602, | |
"step": 3350 | |
}, | |
{ | |
"epoch": 22.885990744636096, | |
"grad_norm": 6.396854877471924, | |
"learning_rate": 1.3108109370257714e-05, | |
"loss": 15.9622, | |
"step": 3400 | |
}, | |
{ | |
"epoch": 22.885990744636096, | |
"eval_dim_128_cosine_accuracy": 0.8743492664458117, | |
"eval_dim_128_dot_accuracy": 0.13002839564600094, | |
"eval_dim_128_euclidean_accuracy": 0.873639375295788, | |
"eval_dim_128_manhattan_accuracy": 0.8786086133459536, | |
"eval_dim_128_max_accuracy": 0.8786086133459536, | |
"eval_dim_256_cosine_accuracy": 0.8760056791292002, | |
"eval_dim_256_dot_accuracy": 0.12434926644581164, | |
"eval_dim_256_euclidean_accuracy": 0.8757690487458589, | |
"eval_dim_256_manhattan_accuracy": 0.8805016564126834, | |
"eval_dim_256_max_accuracy": 0.8805016564126834, | |
"eval_dim_384_cosine_accuracy": 0.8764789398958827, | |
"eval_dim_384_dot_accuracy": 0.12352106010411737, | |
"eval_dim_384_euclidean_accuracy": 0.8764789398958827, | |
"eval_dim_384_manhattan_accuracy": 0.8807382867960246, | |
"eval_dim_384_max_accuracy": 0.8807382867960246, | |
"eval_dim_64_cosine_accuracy": 0.8721012778040701, | |
"eval_dim_64_dot_accuracy": 0.14351632749645055, | |
"eval_dim_64_euclidean_accuracy": 0.8703265499290109, | |
"eval_dim_64_manhattan_accuracy": 0.8781353525792712, | |
"eval_dim_64_max_accuracy": 0.8781353525792712, | |
"eval_loss": 16.473587036132812, | |
"eval_runtime": 103.4538, | |
"eval_samples_per_second": 81.698, | |
"eval_sequential_score": 0.8721012778040701, | |
"eval_steps_per_second": 2.562, | |
"step": 3400 | |
}, | |
{ | |
"epoch": 23.222549432057214, | |
"grad_norm": 4.757622241973877, | |
"learning_rate": 1.288309190272222e-05, | |
"loss": 15.9881, | |
"step": 3450 | |
}, | |
{ | |
"epoch": 23.222549432057214, | |
"eval_dim_128_cosine_accuracy": 0.8743492664458117, | |
"eval_dim_128_dot_accuracy": 0.13097491717936582, | |
"eval_dim_128_euclidean_accuracy": 0.8737576904874585, | |
"eval_dim_128_manhattan_accuracy": 0.879081874112636, | |
"eval_dim_128_max_accuracy": 0.879081874112636, | |
"eval_dim_256_cosine_accuracy": 0.8756507335541883, | |
"eval_dim_256_dot_accuracy": 0.12588736393752958, | |
"eval_dim_256_euclidean_accuracy": 0.8747042120208235, | |
"eval_dim_256_manhattan_accuracy": 0.8795551348793185, | |
"eval_dim_256_max_accuracy": 0.8795551348793185, | |
"eval_dim_384_cosine_accuracy": 0.8755324183625177, | |
"eval_dim_384_dot_accuracy": 0.12446758163748226, | |
"eval_dim_384_euclidean_accuracy": 0.8755324183625177, | |
"eval_dim_384_manhattan_accuracy": 0.879081874112636, | |
"eval_dim_384_max_accuracy": 0.879081874112636, | |
"eval_dim_64_cosine_accuracy": 0.8723379081874113, | |
"eval_dim_64_dot_accuracy": 0.14375295787979175, | |
"eval_dim_64_euclidean_accuracy": 0.8700899195456696, | |
"eval_dim_64_manhattan_accuracy": 0.8788452437292948, | |
"eval_dim_64_max_accuracy": 0.8788452437292948, | |
"eval_loss": 16.48019790649414, | |
"eval_runtime": 104.0826, | |
"eval_samples_per_second": 81.205, | |
"eval_sequential_score": 0.8723379081874113, | |
"eval_steps_per_second": 2.546, | |
"step": 3450 | |
}, | |
{ | |
"epoch": 23.559108119478335, | |
"grad_norm": 5.279081344604492, | |
"learning_rate": 1.2656470711108763e-05, | |
"loss": 15.9482, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 23.559108119478335, | |
"eval_dim_128_cosine_accuracy": 0.8724562233790819, | |
"eval_dim_128_dot_accuracy": 0.13073828679602462, | |
"eval_dim_128_euclidean_accuracy": 0.8728111689540937, | |
"eval_dim_128_manhattan_accuracy": 0.8783719829626124, | |
"eval_dim_128_max_accuracy": 0.8783719829626124, | |
"eval_dim_256_cosine_accuracy": 0.8761239943208708, | |
"eval_dim_256_dot_accuracy": 0.1250591575958353, | |
"eval_dim_256_euclidean_accuracy": 0.8761239943208708, | |
"eval_dim_256_manhattan_accuracy": 0.8797917652626597, | |
"eval_dim_256_max_accuracy": 0.8797917652626597, | |
"eval_dim_384_cosine_accuracy": 0.8761239943208708, | |
"eval_dim_384_dot_accuracy": 0.1238760056791292, | |
"eval_dim_384_euclidean_accuracy": 0.8761239943208708, | |
"eval_dim_384_manhattan_accuracy": 0.8770705158542357, | |
"eval_dim_384_max_accuracy": 0.8770705158542357, | |
"eval_dim_64_cosine_accuracy": 0.8710364410790346, | |
"eval_dim_64_dot_accuracy": 0.143989588263133, | |
"eval_dim_64_euclidean_accuracy": 0.867841930903928, | |
"eval_dim_64_manhattan_accuracy": 0.8764789398958827, | |
"eval_dim_64_max_accuracy": 0.8764789398958827, | |
"eval_loss": 16.482074737548828, | |
"eval_runtime": 102.3602, | |
"eval_samples_per_second": 82.571, | |
"eval_sequential_score": 0.8710364410790346, | |
"eval_steps_per_second": 2.589, | |
"step": 3500 | |
} | |
], | |
"logging_steps": 50, | |
"max_steps": 7400, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 50, | |
"save_steps": 500, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 0.0, | |
"train_batch_size": 32, | |
"trial_name": null, | |
"trial_params": null | |
} | |