|
{ |
|
"best_metric": 0.8931613819214387, |
|
"best_model_checkpoint": "bge-small-hotpotwa-matryoshka-fine-tuned-50/checkpoint-500", |
|
"epoch": 26.924694993689524, |
|
"eval_steps": 50, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.33655868742111905, |
|
"grad_norm": 1.7359095811843872, |
|
"learning_rate": 1.3513513513513515e-06, |
|
"loss": 19.5758, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.33655868742111905, |
|
"eval_dim_128_cosine_accuracy": 0.9551585423568386, |
|
"eval_dim_128_dot_accuracy": 0.08980123047799338, |
|
"eval_dim_128_euclidean_accuracy": 0.9530288689067676, |
|
"eval_dim_128_manhattan_accuracy": 0.9527922385234264, |
|
"eval_dim_128_max_accuracy": 0.9551585423568386, |
|
"eval_dim_256_cosine_accuracy": 0.966280170373876, |
|
"eval_dim_256_dot_accuracy": 0.042711784193090394, |
|
"eval_dim_256_euclidean_accuracy": 0.9659252247988642, |
|
"eval_dim_256_manhattan_accuracy": 0.9634406057737813, |
|
"eval_dim_256_max_accuracy": 0.966280170373876, |
|
"eval_dim_384_cosine_accuracy": 0.9667534311405585, |
|
"eval_dim_384_dot_accuracy": 0.03324656885944155, |
|
"eval_dim_384_euclidean_accuracy": 0.9667534311405585, |
|
"eval_dim_384_manhattan_accuracy": 0.9669900615238997, |
|
"eval_dim_384_max_accuracy": 0.9669900615238997, |
|
"eval_dim_64_cosine_accuracy": 0.9358731661145291, |
|
"eval_dim_64_dot_accuracy": 0.1320397539044013, |
|
"eval_dim_64_euclidean_accuracy": 0.9345716990061524, |
|
"eval_dim_64_manhattan_accuracy": 0.9269995267392334, |
|
"eval_dim_64_max_accuracy": 0.9358731661145291, |
|
"eval_loss": 19.393272399902344, |
|
"eval_runtime": 104.7788, |
|
"eval_samples_per_second": 80.665, |
|
"eval_sequential_score": 0.9358731661145291, |
|
"eval_steps_per_second": 2.529, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6731173748422381, |
|
"grad_norm": 1.976278305053711, |
|
"learning_rate": 2.702702702702703e-06, |
|
"loss": 19.4573, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6731173748422381, |
|
"eval_dim_128_cosine_accuracy": 0.9570515854235684, |
|
"eval_dim_128_dot_accuracy": 0.06625650733554188, |
|
"eval_dim_128_euclidean_accuracy": 0.9589446284902982, |
|
"eval_dim_128_manhattan_accuracy": 0.9557501183151916, |
|
"eval_dim_128_max_accuracy": 0.9589446284902982, |
|
"eval_dim_256_cosine_accuracy": 0.9646237576904875, |
|
"eval_dim_256_dot_accuracy": 0.04046379555134879, |
|
"eval_dim_256_euclidean_accuracy": 0.9650970184571699, |
|
"eval_dim_256_manhattan_accuracy": 0.9632039753904401, |
|
"eval_dim_256_max_accuracy": 0.9650970184571699, |
|
"eval_dim_384_cosine_accuracy": 0.9653336488405111, |
|
"eval_dim_384_dot_accuracy": 0.03466635115948888, |
|
"eval_dim_384_euclidean_accuracy": 0.9653336488405111, |
|
"eval_dim_384_manhattan_accuracy": 0.9646237576904875, |
|
"eval_dim_384_max_accuracy": 0.9653336488405111, |
|
"eval_dim_64_cosine_accuracy": 0.9449834358731661, |
|
"eval_dim_64_dot_accuracy": 0.08932796971131093, |
|
"eval_dim_64_euclidean_accuracy": 0.9461665877898722, |
|
"eval_dim_64_manhattan_accuracy": 0.9420255560814008, |
|
"eval_dim_64_max_accuracy": 0.9461665877898722, |
|
"eval_loss": 19.097097396850586, |
|
"eval_runtime": 103.9699, |
|
"eval_samples_per_second": 81.293, |
|
"eval_sequential_score": 0.9449834358731661, |
|
"eval_steps_per_second": 2.549, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0096760622633572, |
|
"grad_norm": 2.1209616661071777, |
|
"learning_rate": 4.0540540540540545e-06, |
|
"loss": 19.1409, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0096760622633572, |
|
"eval_dim_128_cosine_accuracy": 0.9384761003312825, |
|
"eval_dim_128_dot_accuracy": 0.06897775674396593, |
|
"eval_dim_128_euclidean_accuracy": 0.9421438712730714, |
|
"eval_dim_128_manhattan_accuracy": 0.939540937056318, |
|
"eval_dim_128_max_accuracy": 0.9421438712730714, |
|
"eval_dim_256_cosine_accuracy": 0.9434453383814482, |
|
"eval_dim_256_dot_accuracy": 0.05797444391859915, |
|
"eval_dim_256_euclidean_accuracy": 0.9436819687647894, |
|
"eval_dim_256_manhattan_accuracy": 0.9423805016564126, |
|
"eval_dim_256_max_accuracy": 0.9436819687647894, |
|
"eval_dim_384_cosine_accuracy": 0.9473497397065783, |
|
"eval_dim_384_dot_accuracy": 0.05265026029342167, |
|
"eval_dim_384_euclidean_accuracy": 0.9473497397065783, |
|
"eval_dim_384_manhattan_accuracy": 0.9458116422148604, |
|
"eval_dim_384_max_accuracy": 0.9473497397065783, |
|
"eval_dim_64_cosine_accuracy": 0.9306672976810223, |
|
"eval_dim_64_dot_accuracy": 0.07749645054424988, |
|
"eval_dim_64_euclidean_accuracy": 0.9332702318977757, |
|
"eval_dim_64_manhattan_accuracy": 0.9320870799810695, |
|
"eval_dim_64_max_accuracy": 0.9332702318977757, |
|
"eval_loss": 18.4069766998291, |
|
"eval_runtime": 103.2125, |
|
"eval_samples_per_second": 81.889, |
|
"eval_sequential_score": 0.9306672976810223, |
|
"eval_steps_per_second": 2.568, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.3462347496844762, |
|
"grad_norm": 1.658170461654663, |
|
"learning_rate": 5.405405405405406e-06, |
|
"loss": 18.6431, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.3462347496844762, |
|
"eval_dim_128_cosine_accuracy": 0.9125650733554188, |
|
"eval_dim_128_dot_accuracy": 0.08826313298627544, |
|
"eval_dim_128_euclidean_accuracy": 0.9139848556554662, |
|
"eval_dim_128_manhattan_accuracy": 0.9145764316138192, |
|
"eval_dim_128_max_accuracy": 0.9145764316138192, |
|
"eval_dim_256_cosine_accuracy": 0.9163511594888784, |
|
"eval_dim_256_dot_accuracy": 0.08613345953620445, |
|
"eval_dim_256_euclidean_accuracy": 0.9163511594888784, |
|
"eval_dim_256_manhattan_accuracy": 0.9151680075721723, |
|
"eval_dim_256_max_accuracy": 0.9163511594888784, |
|
"eval_dim_384_cosine_accuracy": 0.9183625177472787, |
|
"eval_dim_384_dot_accuracy": 0.08163748225272124, |
|
"eval_dim_384_euclidean_accuracy": 0.9183625177472787, |
|
"eval_dim_384_manhattan_accuracy": 0.9184808329389493, |
|
"eval_dim_384_max_accuracy": 0.9184808329389493, |
|
"eval_dim_64_cosine_accuracy": 0.9093705631803124, |
|
"eval_dim_64_dot_accuracy": 0.09477046852815901, |
|
"eval_dim_64_euclidean_accuracy": 0.9126833885470894, |
|
"eval_dim_64_manhattan_accuracy": 0.9113819214387128, |
|
"eval_dim_64_max_accuracy": 0.9126833885470894, |
|
"eval_loss": 17.32919692993164, |
|
"eval_runtime": 102.8811, |
|
"eval_samples_per_second": 82.153, |
|
"eval_sequential_score": 0.9093705631803124, |
|
"eval_steps_per_second": 2.576, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.6827934371055953, |
|
"grad_norm": 1.5389924049377441, |
|
"learning_rate": 6.7567567567567575e-06, |
|
"loss": 18.2288, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.6827934371055953, |
|
"eval_dim_128_cosine_accuracy": 0.9062943681968765, |
|
"eval_dim_128_dot_accuracy": 0.09311405584477046, |
|
"eval_dim_128_euclidean_accuracy": 0.9062943681968765, |
|
"eval_dim_128_manhattan_accuracy": 0.9062943681968765, |
|
"eval_dim_128_max_accuracy": 0.9062943681968765, |
|
"eval_dim_256_cosine_accuracy": 0.9071225745385707, |
|
"eval_dim_256_dot_accuracy": 0.09335068622811168, |
|
"eval_dim_256_euclidean_accuracy": 0.907950780880265, |
|
"eval_dim_256_manhattan_accuracy": 0.9093705631803124, |
|
"eval_dim_256_max_accuracy": 0.9093705631803124, |
|
"eval_dim_384_cosine_accuracy": 0.9099621391386654, |
|
"eval_dim_384_dot_accuracy": 0.0900378608613346, |
|
"eval_dim_384_euclidean_accuracy": 0.9099621391386654, |
|
"eval_dim_384_manhattan_accuracy": 0.9087789872219593, |
|
"eval_dim_384_max_accuracy": 0.9099621391386654, |
|
"eval_dim_64_cosine_accuracy": 0.9022716516800757, |
|
"eval_dim_64_dot_accuracy": 0.09962139138665405, |
|
"eval_dim_64_euclidean_accuracy": 0.9046379555134879, |
|
"eval_dim_64_manhattan_accuracy": 0.9040463795551349, |
|
"eval_dim_64_max_accuracy": 0.9046379555134879, |
|
"eval_loss": 16.875099182128906, |
|
"eval_runtime": 104.7249, |
|
"eval_samples_per_second": 80.707, |
|
"eval_sequential_score": 0.9022716516800757, |
|
"eval_steps_per_second": 2.53, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.0193521245267143, |
|
"grad_norm": 1.4371246099472046, |
|
"learning_rate": 8.108108108108109e-06, |
|
"loss": 18.0425, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0193521245267143, |
|
"eval_dim_128_cosine_accuracy": 0.9020350212967345, |
|
"eval_dim_128_dot_accuracy": 0.09772834831992427, |
|
"eval_dim_128_euclidean_accuracy": 0.9035731187884525, |
|
"eval_dim_128_manhattan_accuracy": 0.9044013251301467, |
|
"eval_dim_128_max_accuracy": 0.9044013251301467, |
|
"eval_dim_256_cosine_accuracy": 0.9032181732134406, |
|
"eval_dim_256_dot_accuracy": 0.09690014197823, |
|
"eval_dim_256_euclidean_accuracy": 0.90309985802177, |
|
"eval_dim_256_manhattan_accuracy": 0.9042830099384761, |
|
"eval_dim_256_max_accuracy": 0.9042830099384761, |
|
"eval_dim_384_cosine_accuracy": 0.9045196403218173, |
|
"eval_dim_384_dot_accuracy": 0.09548035967818268, |
|
"eval_dim_384_euclidean_accuracy": 0.9045196403218173, |
|
"eval_dim_384_manhattan_accuracy": 0.9049929010884997, |
|
"eval_dim_384_max_accuracy": 0.9049929010884997, |
|
"eval_dim_64_cosine_accuracy": 0.8989588263132986, |
|
"eval_dim_64_dot_accuracy": 0.10234264079507809, |
|
"eval_dim_64_euclidean_accuracy": 0.9016800757217227, |
|
"eval_dim_64_manhattan_accuracy": 0.9016800757217227, |
|
"eval_dim_64_max_accuracy": 0.9016800757217227, |
|
"eval_loss": 16.69808578491211, |
|
"eval_runtime": 103.4615, |
|
"eval_samples_per_second": 81.692, |
|
"eval_sequential_score": 0.8989588263132986, |
|
"eval_steps_per_second": 2.561, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.3559108119478336, |
|
"grad_norm": 1.386720895767212, |
|
"learning_rate": 9.45945945945946e-06, |
|
"loss": 17.9458, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.3559108119478336, |
|
"eval_dim_128_cosine_accuracy": 0.9036914339801231, |
|
"eval_dim_128_dot_accuracy": 0.09761003312825367, |
|
"eval_dim_128_euclidean_accuracy": 0.9034548035967819, |
|
"eval_dim_128_manhattan_accuracy": 0.9016800757217227, |
|
"eval_dim_128_max_accuracy": 0.9036914339801231, |
|
"eval_dim_256_cosine_accuracy": 0.9013251301467108, |
|
"eval_dim_256_dot_accuracy": 0.09855655466161856, |
|
"eval_dim_256_euclidean_accuracy": 0.9015617605300521, |
|
"eval_dim_256_manhattan_accuracy": 0.9022716516800757, |
|
"eval_dim_256_max_accuracy": 0.9022716516800757, |
|
"eval_dim_384_cosine_accuracy": 0.9021533364884051, |
|
"eval_dim_384_dot_accuracy": 0.09784666351159489, |
|
"eval_dim_384_euclidean_accuracy": 0.9021533364884051, |
|
"eval_dim_384_manhattan_accuracy": 0.9039280643634643, |
|
"eval_dim_384_max_accuracy": 0.9039280643634643, |
|
"eval_dim_64_cosine_accuracy": 0.8983672503549456, |
|
"eval_dim_64_dot_accuracy": 0.10352579271178419, |
|
"eval_dim_64_euclidean_accuracy": 0.8995504022716517, |
|
"eval_dim_64_manhattan_accuracy": 0.8981306199716044, |
|
"eval_dim_64_max_accuracy": 0.8995504022716517, |
|
"eval_loss": 16.615509033203125, |
|
"eval_runtime": 103.1308, |
|
"eval_samples_per_second": 81.954, |
|
"eval_sequential_score": 0.8983672503549456, |
|
"eval_steps_per_second": 2.57, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.6924694993689524, |
|
"grad_norm": 1.4882862567901611, |
|
"learning_rate": 1.0810810810810812e-05, |
|
"loss": 17.8525, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.6924694993689524, |
|
"eval_dim_128_cosine_accuracy": 0.8977756743965926, |
|
"eval_dim_128_dot_accuracy": 0.10269758637008992, |
|
"eval_dim_128_euclidean_accuracy": 0.9006152389966872, |
|
"eval_dim_128_manhattan_accuracy": 0.900378608613346, |
|
"eval_dim_128_max_accuracy": 0.9006152389966872, |
|
"eval_dim_256_cosine_accuracy": 0.8970657832465688, |
|
"eval_dim_256_dot_accuracy": 0.10269758637008992, |
|
"eval_dim_256_euclidean_accuracy": 0.8980123047799338, |
|
"eval_dim_256_manhattan_accuracy": 0.8971840984382394, |
|
"eval_dim_256_max_accuracy": 0.8980123047799338, |
|
"eval_dim_384_cosine_accuracy": 0.8974207288215806, |
|
"eval_dim_384_dot_accuracy": 0.1025792711784193, |
|
"eval_dim_384_euclidean_accuracy": 0.8974207288215806, |
|
"eval_dim_384_manhattan_accuracy": 0.898248935163275, |
|
"eval_dim_384_max_accuracy": 0.898248935163275, |
|
"eval_dim_64_cosine_accuracy": 0.8948177946048272, |
|
"eval_dim_64_dot_accuracy": 0.10636535731187885, |
|
"eval_dim_64_euclidean_accuracy": 0.8969474680548982, |
|
"eval_dim_64_manhattan_accuracy": 0.8948177946048272, |
|
"eval_dim_64_max_accuracy": 0.8969474680548982, |
|
"eval_loss": 16.553625106811523, |
|
"eval_runtime": 103.3808, |
|
"eval_samples_per_second": 81.756, |
|
"eval_sequential_score": 0.8948177946048272, |
|
"eval_steps_per_second": 2.563, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.0290281867900717, |
|
"grad_norm": 1.5986053943634033, |
|
"learning_rate": 1.2162162162162164e-05, |
|
"loss": 17.7529, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.0290281867900717, |
|
"eval_dim_128_cosine_accuracy": 0.8980123047799338, |
|
"eval_dim_128_dot_accuracy": 0.10340747752011359, |
|
"eval_dim_128_euclidean_accuracy": 0.8997870326549929, |
|
"eval_dim_128_manhattan_accuracy": 0.8996687174633223, |
|
"eval_dim_128_max_accuracy": 0.8997870326549929, |
|
"eval_dim_256_cosine_accuracy": 0.8956460009465216, |
|
"eval_dim_256_dot_accuracy": 0.10399905347846664, |
|
"eval_dim_256_euclidean_accuracy": 0.8970657832465688, |
|
"eval_dim_256_manhattan_accuracy": 0.8960009465215334, |
|
"eval_dim_256_max_accuracy": 0.8970657832465688, |
|
"eval_dim_384_cosine_accuracy": 0.8952910553715097, |
|
"eval_dim_384_dot_accuracy": 0.1047089446284903, |
|
"eval_dim_384_euclidean_accuracy": 0.8952910553715097, |
|
"eval_dim_384_manhattan_accuracy": 0.8971840984382394, |
|
"eval_dim_384_max_accuracy": 0.8971840984382394, |
|
"eval_dim_64_cosine_accuracy": 0.8950544249881685, |
|
"eval_dim_64_dot_accuracy": 0.10541883577851396, |
|
"eval_dim_64_euclidean_accuracy": 0.8969474680548982, |
|
"eval_dim_64_manhattan_accuracy": 0.8948177946048272, |
|
"eval_dim_64_max_accuracy": 0.8969474680548982, |
|
"eval_loss": 16.51355743408203, |
|
"eval_runtime": 104.654, |
|
"eval_samples_per_second": 80.761, |
|
"eval_sequential_score": 0.8950544249881685, |
|
"eval_steps_per_second": 2.532, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.3655868742111905, |
|
"grad_norm": 1.8756661415100098, |
|
"learning_rate": 1.3513513513513515e-05, |
|
"loss": 17.6709, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.3655868742111905, |
|
"eval_dim_128_cosine_accuracy": 0.8931613819214387, |
|
"eval_dim_128_dot_accuracy": 0.10766682442025556, |
|
"eval_dim_128_euclidean_accuracy": 0.8944628490298154, |
|
"eval_dim_128_manhattan_accuracy": 0.8942262186464742, |
|
"eval_dim_128_max_accuracy": 0.8944628490298154, |
|
"eval_dim_256_cosine_accuracy": 0.8913866540463795, |
|
"eval_dim_256_dot_accuracy": 0.10896829152863227, |
|
"eval_dim_256_euclidean_accuracy": 0.8937529578797918, |
|
"eval_dim_256_manhattan_accuracy": 0.8937529578797918, |
|
"eval_dim_256_max_accuracy": 0.8937529578797918, |
|
"eval_dim_384_cosine_accuracy": 0.8928064363464269, |
|
"eval_dim_384_dot_accuracy": 0.10719356365357312, |
|
"eval_dim_384_euclidean_accuracy": 0.8928064363464269, |
|
"eval_dim_384_manhattan_accuracy": 0.8932796971131093, |
|
"eval_dim_384_max_accuracy": 0.8932796971131093, |
|
"eval_dim_64_cosine_accuracy": 0.8906767628963559, |
|
"eval_dim_64_dot_accuracy": 0.11121628017037388, |
|
"eval_dim_64_euclidean_accuracy": 0.8911500236630383, |
|
"eval_dim_64_manhattan_accuracy": 0.8893752957879791, |
|
"eval_dim_64_max_accuracy": 0.8911500236630383, |
|
"eval_loss": 16.4824161529541, |
|
"eval_runtime": 103.2754, |
|
"eval_samples_per_second": 81.839, |
|
"eval_sequential_score": 0.8906767628963559, |
|
"eval_steps_per_second": 2.566, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.70214556163231, |
|
"grad_norm": 2.3590304851531982, |
|
"learning_rate": 1.4864864864864865e-05, |
|
"loss": 17.5348, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.70214556163231, |
|
"eval_dim_128_cosine_accuracy": 0.8862991008045433, |
|
"eval_dim_128_dot_accuracy": 0.11500236630383341, |
|
"eval_dim_128_euclidean_accuracy": 0.8864174159962139, |
|
"eval_dim_128_manhattan_accuracy": 0.8858258400378609, |
|
"eval_dim_128_max_accuracy": 0.8864174159962139, |
|
"eval_dim_256_cosine_accuracy": 0.8858258400378609, |
|
"eval_dim_256_dot_accuracy": 0.11358258400378608, |
|
"eval_dim_256_euclidean_accuracy": 0.8867723615712257, |
|
"eval_dim_256_manhattan_accuracy": 0.8858258400378609, |
|
"eval_dim_256_max_accuracy": 0.8867723615712257, |
|
"eval_dim_384_cosine_accuracy": 0.8859441552295315, |
|
"eval_dim_384_dot_accuracy": 0.11405584477046853, |
|
"eval_dim_384_euclidean_accuracy": 0.8859441552295315, |
|
"eval_dim_384_manhattan_accuracy": 0.88760056791292, |
|
"eval_dim_384_max_accuracy": 0.88760056791292, |
|
"eval_dim_64_cosine_accuracy": 0.884879318504496, |
|
"eval_dim_64_dot_accuracy": 0.11985328916232844, |
|
"eval_dim_64_euclidean_accuracy": 0.8845243729294842, |
|
"eval_dim_64_manhattan_accuracy": 0.8828679602460956, |
|
"eval_dim_64_max_accuracy": 0.884879318504496, |
|
"eval_loss": 16.463218688964844, |
|
"eval_runtime": 103.2788, |
|
"eval_samples_per_second": 81.837, |
|
"eval_sequential_score": 0.884879318504496, |
|
"eval_steps_per_second": 2.566, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 4.038704249053429, |
|
"grad_norm": 2.6120336055755615, |
|
"learning_rate": 1.6216216216216218e-05, |
|
"loss": 17.4198, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.038704249053429, |
|
"eval_dim_128_cosine_accuracy": 0.8852342640795078, |
|
"eval_dim_128_dot_accuracy": 0.11748698532891623, |
|
"eval_dim_128_euclidean_accuracy": 0.8846426881211548, |
|
"eval_dim_128_manhattan_accuracy": 0.8859441552295315, |
|
"eval_dim_128_max_accuracy": 0.8859441552295315, |
|
"eval_dim_256_cosine_accuracy": 0.8861807856128727, |
|
"eval_dim_256_dot_accuracy": 0.1137008991954567, |
|
"eval_dim_256_euclidean_accuracy": 0.8871273071462376, |
|
"eval_dim_256_manhattan_accuracy": 0.8866540463795551, |
|
"eval_dim_256_max_accuracy": 0.8871273071462376, |
|
"eval_dim_384_cosine_accuracy": 0.8859441552295315, |
|
"eval_dim_384_dot_accuracy": 0.11405584477046853, |
|
"eval_dim_384_euclidean_accuracy": 0.8859441552295315, |
|
"eval_dim_384_manhattan_accuracy": 0.8847610033128254, |
|
"eval_dim_384_max_accuracy": 0.8859441552295315, |
|
"eval_dim_64_cosine_accuracy": 0.8839327969711311, |
|
"eval_dim_64_dot_accuracy": 0.12103644107903455, |
|
"eval_dim_64_euclidean_accuracy": 0.8861807856128727, |
|
"eval_dim_64_manhattan_accuracy": 0.8857075248461902, |
|
"eval_dim_64_max_accuracy": 0.8861807856128727, |
|
"eval_loss": 16.46009063720703, |
|
"eval_runtime": 104.1113, |
|
"eval_samples_per_second": 81.182, |
|
"eval_sequential_score": 0.8839327969711311, |
|
"eval_steps_per_second": 2.545, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.375262936474548, |
|
"grad_norm": 2.63383412361145, |
|
"learning_rate": 1.756756756756757e-05, |
|
"loss": 17.3673, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.375262936474548, |
|
"eval_dim_128_cosine_accuracy": 0.8853525792711784, |
|
"eval_dim_128_dot_accuracy": 0.1160672030288689, |
|
"eval_dim_128_euclidean_accuracy": 0.8867723615712257, |
|
"eval_dim_128_manhattan_accuracy": 0.8855892096545196, |
|
"eval_dim_128_max_accuracy": 0.8867723615712257, |
|
"eval_dim_256_cosine_accuracy": 0.8864174159962139, |
|
"eval_dim_256_dot_accuracy": 0.11417415996213914, |
|
"eval_dim_256_euclidean_accuracy": 0.8871273071462376, |
|
"eval_dim_256_manhattan_accuracy": 0.8862991008045433, |
|
"eval_dim_256_max_accuracy": 0.8871273071462376, |
|
"eval_dim_384_cosine_accuracy": 0.8865357311878845, |
|
"eval_dim_384_dot_accuracy": 0.11346426881211548, |
|
"eval_dim_384_euclidean_accuracy": 0.8865357311878845, |
|
"eval_dim_384_manhattan_accuracy": 0.8861807856128727, |
|
"eval_dim_384_max_accuracy": 0.8865357311878845, |
|
"eval_dim_64_cosine_accuracy": 0.8841694273544723, |
|
"eval_dim_64_dot_accuracy": 0.12091812588736393, |
|
"eval_dim_64_euclidean_accuracy": 0.883341221012778, |
|
"eval_dim_64_manhattan_accuracy": 0.8828679602460956, |
|
"eval_dim_64_max_accuracy": 0.8841694273544723, |
|
"eval_loss": 16.440513610839844, |
|
"eval_runtime": 102.5958, |
|
"eval_samples_per_second": 82.382, |
|
"eval_sequential_score": 0.8841694273544723, |
|
"eval_steps_per_second": 2.583, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.711821623895667, |
|
"grad_norm": 3.044569730758667, |
|
"learning_rate": 1.891891891891892e-05, |
|
"loss": 17.2603, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.711821623895667, |
|
"eval_dim_128_cosine_accuracy": 0.8834595362044486, |
|
"eval_dim_128_dot_accuracy": 0.11772361571225745, |
|
"eval_dim_128_euclidean_accuracy": 0.8835778513961192, |
|
"eval_dim_128_manhattan_accuracy": 0.8840511121628017, |
|
"eval_dim_128_max_accuracy": 0.8840511121628017, |
|
"eval_dim_256_cosine_accuracy": 0.8838144817794605, |
|
"eval_dim_256_dot_accuracy": 0.11571225745385708, |
|
"eval_dim_256_euclidean_accuracy": 0.8838144817794605, |
|
"eval_dim_256_manhattan_accuracy": 0.8839327969711311, |
|
"eval_dim_256_max_accuracy": 0.8839327969711311, |
|
"eval_dim_384_cosine_accuracy": 0.8838144817794605, |
|
"eval_dim_384_dot_accuracy": 0.11618551822053952, |
|
"eval_dim_384_euclidean_accuracy": 0.8838144817794605, |
|
"eval_dim_384_manhattan_accuracy": 0.8847610033128254, |
|
"eval_dim_384_max_accuracy": 0.8847610033128254, |
|
"eval_dim_64_cosine_accuracy": 0.8807382867960246, |
|
"eval_dim_64_dot_accuracy": 0.12328442972077615, |
|
"eval_dim_64_euclidean_accuracy": 0.8814481779460482, |
|
"eval_dim_64_manhattan_accuracy": 0.8810932323710364, |
|
"eval_dim_64_max_accuracy": 0.8814481779460482, |
|
"eval_loss": 16.435609817504883, |
|
"eval_runtime": 103.6437, |
|
"eval_samples_per_second": 81.549, |
|
"eval_sequential_score": 0.8807382867960246, |
|
"eval_steps_per_second": 2.557, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.0483803113167856, |
|
"grad_norm": 3.3264880180358887, |
|
"learning_rate": 1.9999888744757143e-05, |
|
"loss": 17.1807, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 5.0483803113167856, |
|
"eval_dim_128_cosine_accuracy": 0.8849976336961666, |
|
"eval_dim_128_dot_accuracy": 0.11654046379555134, |
|
"eval_dim_128_euclidean_accuracy": 0.884879318504496, |
|
"eval_dim_128_manhattan_accuracy": 0.8838144817794605, |
|
"eval_dim_128_max_accuracy": 0.8849976336961666, |
|
"eval_dim_256_cosine_accuracy": 0.8864174159962139, |
|
"eval_dim_256_dot_accuracy": 0.11417415996213914, |
|
"eval_dim_256_euclidean_accuracy": 0.8852342640795078, |
|
"eval_dim_256_manhattan_accuracy": 0.8857075248461902, |
|
"eval_dim_256_max_accuracy": 0.8864174159962139, |
|
"eval_dim_384_cosine_accuracy": 0.8859441552295315, |
|
"eval_dim_384_dot_accuracy": 0.11405584477046853, |
|
"eval_dim_384_euclidean_accuracy": 0.8859441552295315, |
|
"eval_dim_384_manhattan_accuracy": 0.8855892096545196, |
|
"eval_dim_384_max_accuracy": 0.8859441552295315, |
|
"eval_dim_64_cosine_accuracy": 0.8838144817794605, |
|
"eval_dim_64_dot_accuracy": 0.12079981069569333, |
|
"eval_dim_64_euclidean_accuracy": 0.8844060577378136, |
|
"eval_dim_64_manhattan_accuracy": 0.8834595362044486, |
|
"eval_dim_64_max_accuracy": 0.8844060577378136, |
|
"eval_loss": 16.444347381591797, |
|
"eval_runtime": 103.5226, |
|
"eval_samples_per_second": 81.644, |
|
"eval_sequential_score": 0.8838144817794605, |
|
"eval_steps_per_second": 2.56, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 5.384938998737905, |
|
"grad_norm": 2.7032034397125244, |
|
"learning_rate": 1.999599507118322e-05, |
|
"loss": 17.1629, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.384938998737905, |
|
"eval_dim_128_cosine_accuracy": 0.8847610033128254, |
|
"eval_dim_128_dot_accuracy": 0.11701372456223379, |
|
"eval_dim_128_euclidean_accuracy": 0.8859441552295315, |
|
"eval_dim_128_manhattan_accuracy": 0.884879318504496, |
|
"eval_dim_128_max_accuracy": 0.8859441552295315, |
|
"eval_dim_256_cosine_accuracy": 0.8861807856128727, |
|
"eval_dim_256_dot_accuracy": 0.11417415996213914, |
|
"eval_dim_256_euclidean_accuracy": 0.8859441552295315, |
|
"eval_dim_256_manhattan_accuracy": 0.8853525792711784, |
|
"eval_dim_256_max_accuracy": 0.8861807856128727, |
|
"eval_dim_384_cosine_accuracy": 0.8866540463795551, |
|
"eval_dim_384_dot_accuracy": 0.11334595362044486, |
|
"eval_dim_384_euclidean_accuracy": 0.8866540463795551, |
|
"eval_dim_384_manhattan_accuracy": 0.8862991008045433, |
|
"eval_dim_384_max_accuracy": 0.8866540463795551, |
|
"eval_dim_64_cosine_accuracy": 0.8841694273544723, |
|
"eval_dim_64_dot_accuracy": 0.11831519167061051, |
|
"eval_dim_64_euclidean_accuracy": 0.8841694273544723, |
|
"eval_dim_64_manhattan_accuracy": 0.8839327969711311, |
|
"eval_dim_64_max_accuracy": 0.8841694273544723, |
|
"eval_loss": 16.420166015625, |
|
"eval_runtime": 103.5297, |
|
"eval_samples_per_second": 81.638, |
|
"eval_sequential_score": 0.8841694273544723, |
|
"eval_steps_per_second": 2.56, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.721497686159024, |
|
"grad_norm": 3.8163998126983643, |
|
"learning_rate": 1.9986541110764565e-05, |
|
"loss": 17.0747, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 5.721497686159024, |
|
"eval_dim_128_cosine_accuracy": 0.8853525792711784, |
|
"eval_dim_128_dot_accuracy": 0.11618551822053952, |
|
"eval_dim_128_euclidean_accuracy": 0.8835778513961192, |
|
"eval_dim_128_manhattan_accuracy": 0.8845243729294842, |
|
"eval_dim_128_max_accuracy": 0.8853525792711784, |
|
"eval_dim_256_cosine_accuracy": 0.8874822527212494, |
|
"eval_dim_256_dot_accuracy": 0.11358258400378608, |
|
"eval_dim_256_euclidean_accuracy": 0.8864174159962139, |
|
"eval_dim_256_manhattan_accuracy": 0.8862991008045433, |
|
"eval_dim_256_max_accuracy": 0.8874822527212494, |
|
"eval_dim_384_cosine_accuracy": 0.8868906767628963, |
|
"eval_dim_384_dot_accuracy": 0.11310932323710364, |
|
"eval_dim_384_euclidean_accuracy": 0.8868906767628963, |
|
"eval_dim_384_manhattan_accuracy": 0.8862991008045433, |
|
"eval_dim_384_max_accuracy": 0.8868906767628963, |
|
"eval_dim_64_cosine_accuracy": 0.8836961665877898, |
|
"eval_dim_64_dot_accuracy": 0.11867013724562234, |
|
"eval_dim_64_euclidean_accuracy": 0.8831045906294368, |
|
"eval_dim_64_manhattan_accuracy": 0.8832229058211074, |
|
"eval_dim_64_max_accuracy": 0.8836961665877898, |
|
"eval_loss": 16.416208267211914, |
|
"eval_runtime": 103.4694, |
|
"eval_samples_per_second": 81.686, |
|
"eval_sequential_score": 0.8836961665877898, |
|
"eval_steps_per_second": 2.561, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 6.058056373580143, |
|
"grad_norm": 3.9848620891571045, |
|
"learning_rate": 1.9971532122280466e-05, |
|
"loss": 17.0161, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.058056373580143, |
|
"eval_dim_128_cosine_accuracy": 0.8852342640795078, |
|
"eval_dim_128_dot_accuracy": 0.11618551822053952, |
|
"eval_dim_128_euclidean_accuracy": 0.8852342640795078, |
|
"eval_dim_128_manhattan_accuracy": 0.8846426881211548, |
|
"eval_dim_128_max_accuracy": 0.8852342640795078, |
|
"eval_dim_256_cosine_accuracy": 0.8862991008045433, |
|
"eval_dim_256_dot_accuracy": 0.11417415996213914, |
|
"eval_dim_256_euclidean_accuracy": 0.8858258400378609, |
|
"eval_dim_256_manhattan_accuracy": 0.8853525792711784, |
|
"eval_dim_256_max_accuracy": 0.8862991008045433, |
|
"eval_dim_384_cosine_accuracy": 0.8855892096545196, |
|
"eval_dim_384_dot_accuracy": 0.11441079034548036, |
|
"eval_dim_384_euclidean_accuracy": 0.8855892096545196, |
|
"eval_dim_384_manhattan_accuracy": 0.885470894462849, |
|
"eval_dim_384_max_accuracy": 0.8855892096545196, |
|
"eval_dim_64_cosine_accuracy": 0.8855892096545196, |
|
"eval_dim_64_dot_accuracy": 0.11831519167061051, |
|
"eval_dim_64_euclidean_accuracy": 0.885470894462849, |
|
"eval_dim_64_manhattan_accuracy": 0.8834595362044486, |
|
"eval_dim_64_max_accuracy": 0.8855892096545196, |
|
"eval_loss": 16.419212341308594, |
|
"eval_runtime": 104.3001, |
|
"eval_samples_per_second": 81.035, |
|
"eval_sequential_score": 0.8855892096545196, |
|
"eval_steps_per_second": 2.541, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.394615061001262, |
|
"grad_norm": 4.083323001861572, |
|
"learning_rate": 1.995097645450266e-05, |
|
"loss": 17.0146, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 6.394615061001262, |
|
"eval_dim_128_cosine_accuracy": 0.884879318504496, |
|
"eval_dim_128_dot_accuracy": 0.1171320397539044, |
|
"eval_dim_128_euclidean_accuracy": 0.8861807856128727, |
|
"eval_dim_128_manhattan_accuracy": 0.8853525792711784, |
|
"eval_dim_128_max_accuracy": 0.8861807856128727, |
|
"eval_dim_256_cosine_accuracy": 0.8853525792711784, |
|
"eval_dim_256_dot_accuracy": 0.11464742072882159, |
|
"eval_dim_256_euclidean_accuracy": 0.885470894462849, |
|
"eval_dim_256_manhattan_accuracy": 0.8858258400378609, |
|
"eval_dim_256_max_accuracy": 0.8858258400378609, |
|
"eval_dim_384_cosine_accuracy": 0.8855892096545196, |
|
"eval_dim_384_dot_accuracy": 0.11441079034548036, |
|
"eval_dim_384_euclidean_accuracy": 0.8855892096545196, |
|
"eval_dim_384_manhattan_accuracy": 0.8864174159962139, |
|
"eval_dim_384_max_accuracy": 0.8864174159962139, |
|
"eval_dim_64_cosine_accuracy": 0.8844060577378136, |
|
"eval_dim_64_dot_accuracy": 0.11796024609559867, |
|
"eval_dim_64_euclidean_accuracy": 0.8852342640795078, |
|
"eval_dim_64_manhattan_accuracy": 0.8844060577378136, |
|
"eval_dim_64_max_accuracy": 0.8852342640795078, |
|
"eval_loss": 16.403297424316406, |
|
"eval_runtime": 102.2875, |
|
"eval_samples_per_second": 82.63, |
|
"eval_sequential_score": 0.8844060577378136, |
|
"eval_steps_per_second": 2.591, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 6.731173748422381, |
|
"grad_norm": 3.874021291732788, |
|
"learning_rate": 1.992488554155135e-05, |
|
"loss": 16.9393, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.731173748422381, |
|
"eval_dim_128_cosine_accuracy": 0.8828679602460956, |
|
"eval_dim_128_dot_accuracy": 0.11784193090392807, |
|
"eval_dim_128_euclidean_accuracy": 0.8846426881211548, |
|
"eval_dim_128_manhattan_accuracy": 0.8841694273544723, |
|
"eval_dim_128_max_accuracy": 0.8846426881211548, |
|
"eval_dim_256_cosine_accuracy": 0.8839327969711311, |
|
"eval_dim_256_dot_accuracy": 0.1171320397539044, |
|
"eval_dim_256_euclidean_accuracy": 0.8840511121628017, |
|
"eval_dim_256_manhattan_accuracy": 0.8852342640795078, |
|
"eval_dim_256_max_accuracy": 0.8852342640795078, |
|
"eval_dim_384_cosine_accuracy": 0.8847610033128254, |
|
"eval_dim_384_dot_accuracy": 0.11523899668717463, |
|
"eval_dim_384_euclidean_accuracy": 0.8847610033128254, |
|
"eval_dim_384_manhattan_accuracy": 0.8852342640795078, |
|
"eval_dim_384_max_accuracy": 0.8852342640795078, |
|
"eval_dim_64_cosine_accuracy": 0.8834595362044486, |
|
"eval_dim_64_dot_accuracy": 0.11831519167061051, |
|
"eval_dim_64_euclidean_accuracy": 0.8835778513961192, |
|
"eval_dim_64_manhattan_accuracy": 0.8820397539044014, |
|
"eval_dim_64_max_accuracy": 0.8835778513961192, |
|
"eval_loss": 16.40532684326172, |
|
"eval_runtime": 104.0121, |
|
"eval_samples_per_second": 81.26, |
|
"eval_sequential_score": 0.8834595362044486, |
|
"eval_steps_per_second": 2.548, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.0677324358435, |
|
"grad_norm": 4.689154148101807, |
|
"learning_rate": 1.9893273896534936e-05, |
|
"loss": 16.899, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 7.0677324358435, |
|
"eval_dim_128_cosine_accuracy": 0.8826313298627544, |
|
"eval_dim_128_dot_accuracy": 0.11867013724562234, |
|
"eval_dim_128_euclidean_accuracy": 0.8823946994794132, |
|
"eval_dim_128_manhattan_accuracy": 0.882158069096072, |
|
"eval_dim_128_max_accuracy": 0.8826313298627544, |
|
"eval_dim_256_cosine_accuracy": 0.8828679602460956, |
|
"eval_dim_256_dot_accuracy": 0.11725035494557501, |
|
"eval_dim_256_euclidean_accuracy": 0.8831045906294368, |
|
"eval_dim_256_manhattan_accuracy": 0.8834595362044486, |
|
"eval_dim_256_max_accuracy": 0.8834595362044486, |
|
"eval_dim_384_cosine_accuracy": 0.883341221012778, |
|
"eval_dim_384_dot_accuracy": 0.11665877898722196, |
|
"eval_dim_384_euclidean_accuracy": 0.883341221012778, |
|
"eval_dim_384_manhattan_accuracy": 0.8839327969711311, |
|
"eval_dim_384_max_accuracy": 0.8839327969711311, |
|
"eval_dim_64_cosine_accuracy": 0.88180312352106, |
|
"eval_dim_64_dot_accuracy": 0.11890676762896356, |
|
"eval_dim_64_euclidean_accuracy": 0.8828679602460956, |
|
"eval_dim_64_manhattan_accuracy": 0.8820397539044014, |
|
"eval_dim_64_max_accuracy": 0.8828679602460956, |
|
"eval_loss": 16.416202545166016, |
|
"eval_runtime": 104.6249, |
|
"eval_samples_per_second": 80.784, |
|
"eval_sequential_score": 0.88180312352106, |
|
"eval_steps_per_second": 2.533, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 7.40429112326462, |
|
"grad_norm": 3.6406683921813965, |
|
"learning_rate": 1.9856159103477085e-05, |
|
"loss": 16.9112, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.40429112326462, |
|
"eval_dim_128_cosine_accuracy": 0.8828679602460956, |
|
"eval_dim_128_dot_accuracy": 0.11878845243729295, |
|
"eval_dim_128_euclidean_accuracy": 0.8828679602460956, |
|
"eval_dim_128_manhattan_accuracy": 0.8828679602460956, |
|
"eval_dim_128_max_accuracy": 0.8828679602460956, |
|
"eval_dim_256_cosine_accuracy": 0.8834595362044486, |
|
"eval_dim_256_dot_accuracy": 0.11618551822053952, |
|
"eval_dim_256_euclidean_accuracy": 0.8826313298627544, |
|
"eval_dim_256_manhattan_accuracy": 0.8840511121628017, |
|
"eval_dim_256_max_accuracy": 0.8840511121628017, |
|
"eval_dim_384_cosine_accuracy": 0.883341221012778, |
|
"eval_dim_384_dot_accuracy": 0.11665877898722196, |
|
"eval_dim_384_euclidean_accuracy": 0.883341221012778, |
|
"eval_dim_384_manhattan_accuracy": 0.884287742546143, |
|
"eval_dim_384_max_accuracy": 0.884287742546143, |
|
"eval_dim_64_cosine_accuracy": 0.8820397539044014, |
|
"eval_dim_64_dot_accuracy": 0.11914339801230478, |
|
"eval_dim_64_euclidean_accuracy": 0.8831045906294368, |
|
"eval_dim_64_manhattan_accuracy": 0.8826313298627544, |
|
"eval_dim_64_max_accuracy": 0.8831045906294368, |
|
"eval_loss": 16.405092239379883, |
|
"eval_runtime": 101.4605, |
|
"eval_samples_per_second": 83.303, |
|
"eval_sequential_score": 0.8820397539044014, |
|
"eval_steps_per_second": 2.612, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.740849810685738, |
|
"grad_norm": 4.141761302947998, |
|
"learning_rate": 1.9813561807535597e-05, |
|
"loss": 16.8508, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 7.740849810685738, |
|
"eval_dim_128_cosine_accuracy": 0.882158069096072, |
|
"eval_dim_128_dot_accuracy": 0.11878845243729295, |
|
"eval_dim_128_euclidean_accuracy": 0.8825130146710838, |
|
"eval_dim_128_manhattan_accuracy": 0.8838144817794605, |
|
"eval_dim_128_max_accuracy": 0.8838144817794605, |
|
"eval_dim_256_cosine_accuracy": 0.8825130146710838, |
|
"eval_dim_256_dot_accuracy": 0.11748698532891623, |
|
"eval_dim_256_euclidean_accuracy": 0.8831045906294368, |
|
"eval_dim_256_manhattan_accuracy": 0.8835778513961192, |
|
"eval_dim_256_max_accuracy": 0.8835778513961192, |
|
"eval_dim_384_cosine_accuracy": 0.8829862754377662, |
|
"eval_dim_384_dot_accuracy": 0.11701372456223379, |
|
"eval_dim_384_euclidean_accuracy": 0.8829862754377662, |
|
"eval_dim_384_manhattan_accuracy": 0.883341221012778, |
|
"eval_dim_384_max_accuracy": 0.883341221012778, |
|
"eval_dim_64_cosine_accuracy": 0.8820397539044014, |
|
"eval_dim_64_dot_accuracy": 0.12115475627070516, |
|
"eval_dim_64_euclidean_accuracy": 0.882158069096072, |
|
"eval_dim_64_manhattan_accuracy": 0.882749645054425, |
|
"eval_dim_64_max_accuracy": 0.882749645054425, |
|
"eval_loss": 16.40436363220215, |
|
"eval_runtime": 102.9818, |
|
"eval_samples_per_second": 82.073, |
|
"eval_sequential_score": 0.8820397539044014, |
|
"eval_steps_per_second": 2.573, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 8.077408498106857, |
|
"grad_norm": 3.7137351036071777, |
|
"learning_rate": 1.9765505703518494e-05, |
|
"loss": 16.8104, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 8.077408498106857, |
|
"eval_dim_128_cosine_accuracy": 0.8815664931377188, |
|
"eval_dim_128_dot_accuracy": 0.119380028395646, |
|
"eval_dim_128_euclidean_accuracy": 0.8813298627543776, |
|
"eval_dim_128_manhattan_accuracy": 0.8820397539044014, |
|
"eval_dim_128_max_accuracy": 0.8820397539044014, |
|
"eval_dim_256_cosine_accuracy": 0.8815664931377188, |
|
"eval_dim_256_dot_accuracy": 0.11796024609559867, |
|
"eval_dim_256_euclidean_accuracy": 0.8808566019876952, |
|
"eval_dim_256_manhattan_accuracy": 0.8807382867960246, |
|
"eval_dim_256_max_accuracy": 0.8815664931377188, |
|
"eval_dim_384_cosine_accuracy": 0.8814481779460482, |
|
"eval_dim_384_dot_accuracy": 0.11855182205395173, |
|
"eval_dim_384_euclidean_accuracy": 0.8814481779460482, |
|
"eval_dim_384_manhattan_accuracy": 0.880619971604354, |
|
"eval_dim_384_max_accuracy": 0.8814481779460482, |
|
"eval_dim_64_cosine_accuracy": 0.8816848083293894, |
|
"eval_dim_64_dot_accuracy": 0.12174633222905822, |
|
"eval_dim_64_euclidean_accuracy": 0.880619971604354, |
|
"eval_dim_64_manhattan_accuracy": 0.8809749171793658, |
|
"eval_dim_64_max_accuracy": 0.8816848083293894, |
|
"eval_loss": 16.40627670288086, |
|
"eval_runtime": 104.9051, |
|
"eval_samples_per_second": 80.568, |
|
"eval_sequential_score": 0.8816848083293894, |
|
"eval_steps_per_second": 2.526, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 8.413967185527977, |
|
"grad_norm": 3.3535964488983154, |
|
"learning_rate": 1.9712017522703764e-05, |
|
"loss": 16.8212, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 8.413967185527977, |
|
"eval_dim_128_cosine_accuracy": 0.8834595362044486, |
|
"eval_dim_128_dot_accuracy": 0.11796024609559867, |
|
"eval_dim_128_euclidean_accuracy": 0.882749645054425, |
|
"eval_dim_128_manhattan_accuracy": 0.8825130146710838, |
|
"eval_dim_128_max_accuracy": 0.8834595362044486, |
|
"eval_dim_256_cosine_accuracy": 0.882158069096072, |
|
"eval_dim_256_dot_accuracy": 0.11748698532891623, |
|
"eval_dim_256_euclidean_accuracy": 0.8823946994794132, |
|
"eval_dim_256_manhattan_accuracy": 0.8819214387127308, |
|
"eval_dim_256_max_accuracy": 0.8823946994794132, |
|
"eval_dim_384_cosine_accuracy": 0.882158069096072, |
|
"eval_dim_384_dot_accuracy": 0.11784193090392807, |
|
"eval_dim_384_euclidean_accuracy": 0.882158069096072, |
|
"eval_dim_384_manhattan_accuracy": 0.882749645054425, |
|
"eval_dim_384_max_accuracy": 0.882749645054425, |
|
"eval_dim_64_cosine_accuracy": 0.8820397539044014, |
|
"eval_dim_64_dot_accuracy": 0.12091812588736393, |
|
"eval_dim_64_euclidean_accuracy": 0.8819214387127308, |
|
"eval_dim_64_manhattan_accuracy": 0.8815664931377188, |
|
"eval_dim_64_max_accuracy": 0.8820397539044014, |
|
"eval_loss": 16.40399169921875, |
|
"eval_runtime": 103.0829, |
|
"eval_samples_per_second": 81.992, |
|
"eval_sequential_score": 0.8820397539044014, |
|
"eval_steps_per_second": 2.571, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 8.750525872949096, |
|
"grad_norm": 4.203086853027344, |
|
"learning_rate": 1.9653127017970035e-05, |
|
"loss": 16.7743, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 8.750525872949096, |
|
"eval_dim_128_cosine_accuracy": 0.882158069096072, |
|
"eval_dim_128_dot_accuracy": 0.12020823473734027, |
|
"eval_dim_128_euclidean_accuracy": 0.8815664931377188, |
|
"eval_dim_128_manhattan_accuracy": 0.8814481779460482, |
|
"eval_dim_128_max_accuracy": 0.882158069096072, |
|
"eval_dim_256_cosine_accuracy": 0.8823946994794132, |
|
"eval_dim_256_dot_accuracy": 0.11878845243729295, |
|
"eval_dim_256_euclidean_accuracy": 0.8819214387127308, |
|
"eval_dim_256_manhattan_accuracy": 0.8816848083293894, |
|
"eval_dim_256_max_accuracy": 0.8823946994794132, |
|
"eval_dim_384_cosine_accuracy": 0.8816848083293894, |
|
"eval_dim_384_dot_accuracy": 0.11831519167061051, |
|
"eval_dim_384_euclidean_accuracy": 0.8816848083293894, |
|
"eval_dim_384_manhattan_accuracy": 0.882158069096072, |
|
"eval_dim_384_max_accuracy": 0.882158069096072, |
|
"eval_dim_64_cosine_accuracy": 0.8809749171793658, |
|
"eval_dim_64_dot_accuracy": 0.121509701845717, |
|
"eval_dim_64_euclidean_accuracy": 0.8807382867960246, |
|
"eval_dim_64_manhattan_accuracy": 0.881211547562707, |
|
"eval_dim_64_max_accuracy": 0.881211547562707, |
|
"eval_loss": 16.39342498779297, |
|
"eval_runtime": 102.6649, |
|
"eval_samples_per_second": 82.326, |
|
"eval_sequential_score": 0.8809749171793658, |
|
"eval_steps_per_second": 2.581, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 9.087084560370215, |
|
"grad_norm": 3.313908576965332, |
|
"learning_rate": 1.9588866947246498e-05, |
|
"loss": 16.7383, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 9.087084560370215, |
|
"eval_dim_128_cosine_accuracy": 0.8809749171793658, |
|
"eval_dim_128_dot_accuracy": 0.12068149550402271, |
|
"eval_dim_128_euclidean_accuracy": 0.8808566019876952, |
|
"eval_dim_128_manhattan_accuracy": 0.8814481779460482, |
|
"eval_dim_128_max_accuracy": 0.8814481779460482, |
|
"eval_dim_256_cosine_accuracy": 0.8820397539044014, |
|
"eval_dim_256_dot_accuracy": 0.11831519167061051, |
|
"eval_dim_256_euclidean_accuracy": 0.8810932323710364, |
|
"eval_dim_256_manhattan_accuracy": 0.881211547562707, |
|
"eval_dim_256_max_accuracy": 0.8820397539044014, |
|
"eval_dim_384_cosine_accuracy": 0.8807382867960246, |
|
"eval_dim_384_dot_accuracy": 0.11926171320397538, |
|
"eval_dim_384_euclidean_accuracy": 0.8807382867960246, |
|
"eval_dim_384_manhattan_accuracy": 0.8803833412210128, |
|
"eval_dim_384_max_accuracy": 0.8807382867960246, |
|
"eval_dim_64_cosine_accuracy": 0.880028395646001, |
|
"eval_dim_64_dot_accuracy": 0.12245622337908188, |
|
"eval_dim_64_euclidean_accuracy": 0.8807382867960246, |
|
"eval_dim_64_manhattan_accuracy": 0.8816848083293894, |
|
"eval_dim_64_max_accuracy": 0.8816848083293894, |
|
"eval_loss": 16.39626121520996, |
|
"eval_runtime": 105.1167, |
|
"eval_samples_per_second": 80.406, |
|
"eval_sequential_score": 0.880028395646001, |
|
"eval_steps_per_second": 2.521, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 9.423643247791334, |
|
"grad_norm": 6.617325305938721, |
|
"learning_rate": 1.9519273055291266e-05, |
|
"loss": 16.743, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 9.423643247791334, |
|
"eval_dim_128_cosine_accuracy": 0.8819214387127308, |
|
"eval_dim_128_dot_accuracy": 0.119380028395646, |
|
"eval_dim_128_euclidean_accuracy": 0.8826313298627544, |
|
"eval_dim_128_manhattan_accuracy": 0.8815664931377188, |
|
"eval_dim_128_max_accuracy": 0.8826313298627544, |
|
"eval_dim_256_cosine_accuracy": 0.882158069096072, |
|
"eval_dim_256_dot_accuracy": 0.11784193090392807, |
|
"eval_dim_256_euclidean_accuracy": 0.882158069096072, |
|
"eval_dim_256_manhattan_accuracy": 0.8816848083293894, |
|
"eval_dim_256_max_accuracy": 0.882158069096072, |
|
"eval_dim_384_cosine_accuracy": 0.8819214387127308, |
|
"eval_dim_384_dot_accuracy": 0.11807856128726929, |
|
"eval_dim_384_euclidean_accuracy": 0.8819214387127308, |
|
"eval_dim_384_manhattan_accuracy": 0.8826313298627544, |
|
"eval_dim_384_max_accuracy": 0.8826313298627544, |
|
"eval_dim_64_cosine_accuracy": 0.8797917652626597, |
|
"eval_dim_64_dot_accuracy": 0.12091812588736393, |
|
"eval_dim_64_euclidean_accuracy": 0.8810932323710364, |
|
"eval_dim_64_manhattan_accuracy": 0.8807382867960246, |
|
"eval_dim_64_max_accuracy": 0.8810932323710364, |
|
"eval_loss": 16.406700134277344, |
|
"eval_runtime": 101.1577, |
|
"eval_samples_per_second": 83.553, |
|
"eval_sequential_score": 0.8797917652626597, |
|
"eval_steps_per_second": 2.62, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 9.760201935212454, |
|
"grad_norm": 4.450948715209961, |
|
"learning_rate": 1.944438405380829e-05, |
|
"loss": 16.7047, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 9.760201935212454, |
|
"eval_dim_128_cosine_accuracy": 0.8803833412210128, |
|
"eval_dim_128_dot_accuracy": 0.12056318031235211, |
|
"eval_dim_128_euclidean_accuracy": 0.8810932323710364, |
|
"eval_dim_128_manhattan_accuracy": 0.8802650260293422, |
|
"eval_dim_128_max_accuracy": 0.8810932323710364, |
|
"eval_dim_256_cosine_accuracy": 0.8809749171793658, |
|
"eval_dim_256_dot_accuracy": 0.11914339801230478, |
|
"eval_dim_256_euclidean_accuracy": 0.8813298627543776, |
|
"eval_dim_256_manhattan_accuracy": 0.881211547562707, |
|
"eval_dim_256_max_accuracy": 0.8813298627543776, |
|
"eval_dim_384_cosine_accuracy": 0.8809749171793658, |
|
"eval_dim_384_dot_accuracy": 0.11902508282063418, |
|
"eval_dim_384_euclidean_accuracy": 0.8809749171793658, |
|
"eval_dim_384_manhattan_accuracy": 0.8820397539044014, |
|
"eval_dim_384_max_accuracy": 0.8820397539044014, |
|
"eval_dim_64_cosine_accuracy": 0.8796734500709891, |
|
"eval_dim_64_dot_accuracy": 0.12245622337908188, |
|
"eval_dim_64_euclidean_accuracy": 0.880028395646001, |
|
"eval_dim_64_manhattan_accuracy": 0.8803833412210128, |
|
"eval_dim_64_max_accuracy": 0.8803833412210128, |
|
"eval_loss": 16.39591407775879, |
|
"eval_runtime": 102.018, |
|
"eval_samples_per_second": 82.848, |
|
"eval_sequential_score": 0.8796734500709891, |
|
"eval_steps_per_second": 2.598, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 10.096760622633571, |
|
"grad_norm": 6.13853120803833, |
|
"learning_rate": 1.9364241599913923e-05, |
|
"loss": 16.6782, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.096760622633571, |
|
"eval_dim_128_cosine_accuracy": 0.8788452437292948, |
|
"eval_dim_128_dot_accuracy": 0.1228111689540937, |
|
"eval_dim_128_euclidean_accuracy": 0.8796734500709891, |
|
"eval_dim_128_manhattan_accuracy": 0.879081874112636, |
|
"eval_dim_128_max_accuracy": 0.8796734500709891, |
|
"eval_dim_256_cosine_accuracy": 0.879081874112636, |
|
"eval_dim_256_dot_accuracy": 0.12091812588736393, |
|
"eval_dim_256_euclidean_accuracy": 0.8797917652626597, |
|
"eval_dim_256_manhattan_accuracy": 0.8803833412210128, |
|
"eval_dim_256_max_accuracy": 0.8803833412210128, |
|
"eval_dim_384_cosine_accuracy": 0.8795551348793185, |
|
"eval_dim_384_dot_accuracy": 0.12044486512068149, |
|
"eval_dim_384_euclidean_accuracy": 0.8795551348793185, |
|
"eval_dim_384_manhattan_accuracy": 0.8799100804543304, |
|
"eval_dim_384_max_accuracy": 0.8799100804543304, |
|
"eval_dim_64_cosine_accuracy": 0.8783719829626124, |
|
"eval_dim_64_dot_accuracy": 0.12363937529578797, |
|
"eval_dim_64_euclidean_accuracy": 0.8795551348793185, |
|
"eval_dim_64_manhattan_accuracy": 0.8781353525792712, |
|
"eval_dim_64_max_accuracy": 0.8795551348793185, |
|
"eval_loss": 16.398588180541992, |
|
"eval_runtime": 103.6429, |
|
"eval_samples_per_second": 81.549, |
|
"eval_sequential_score": 0.8783719829626124, |
|
"eval_steps_per_second": 2.557, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.43331931005469, |
|
"grad_norm": 4.757913112640381, |
|
"learning_rate": 1.9278890272965097e-05, |
|
"loss": 16.6708, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 10.43331931005469, |
|
"eval_dim_128_cosine_accuracy": 0.8794368196876479, |
|
"eval_dim_128_dot_accuracy": 0.121509701845717, |
|
"eval_dim_128_euclidean_accuracy": 0.8795551348793185, |
|
"eval_dim_128_manhattan_accuracy": 0.879081874112636, |
|
"eval_dim_128_max_accuracy": 0.8795551348793185, |
|
"eval_dim_256_cosine_accuracy": 0.8792001893043067, |
|
"eval_dim_256_dot_accuracy": 0.11961665877898722, |
|
"eval_dim_256_euclidean_accuracy": 0.8794368196876479, |
|
"eval_dim_256_manhattan_accuracy": 0.8795551348793185, |
|
"eval_dim_256_max_accuracy": 0.8795551348793185, |
|
"eval_dim_384_cosine_accuracy": 0.8796734500709891, |
|
"eval_dim_384_dot_accuracy": 0.12032654992901089, |
|
"eval_dim_384_euclidean_accuracy": 0.8796734500709891, |
|
"eval_dim_384_manhattan_accuracy": 0.8809749171793658, |
|
"eval_dim_384_max_accuracy": 0.8809749171793658, |
|
"eval_dim_64_cosine_accuracy": 0.879081874112636, |
|
"eval_dim_64_dot_accuracy": 0.12245622337908188, |
|
"eval_dim_64_euclidean_accuracy": 0.8796734500709891, |
|
"eval_dim_64_manhattan_accuracy": 0.8802650260293422, |
|
"eval_dim_64_max_accuracy": 0.8802650260293422, |
|
"eval_loss": 16.401565551757812, |
|
"eval_runtime": 103.0896, |
|
"eval_samples_per_second": 81.987, |
|
"eval_sequential_score": 0.879081874112636, |
|
"eval_steps_per_second": 2.571, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 10.76987799747581, |
|
"grad_norm": 5.452834129333496, |
|
"learning_rate": 1.9188377549761962e-05, |
|
"loss": 16.6485, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 10.76987799747581, |
|
"eval_dim_128_cosine_accuracy": 0.8789635589209654, |
|
"eval_dim_128_dot_accuracy": 0.1216280170373876, |
|
"eval_dim_128_euclidean_accuracy": 0.8789635589209654, |
|
"eval_dim_128_manhattan_accuracy": 0.8781353525792712, |
|
"eval_dim_128_max_accuracy": 0.8789635589209654, |
|
"eval_dim_256_cosine_accuracy": 0.8801467108376716, |
|
"eval_dim_256_dot_accuracy": 0.11985328916232844, |
|
"eval_dim_256_euclidean_accuracy": 0.8796734500709891, |
|
"eval_dim_256_manhattan_accuracy": 0.8794368196876479, |
|
"eval_dim_256_max_accuracy": 0.8801467108376716, |
|
"eval_dim_384_cosine_accuracy": 0.879081874112636, |
|
"eval_dim_384_dot_accuracy": 0.12091812588736393, |
|
"eval_dim_384_euclidean_accuracy": 0.879081874112636, |
|
"eval_dim_384_manhattan_accuracy": 0.8794368196876479, |
|
"eval_dim_384_max_accuracy": 0.8794368196876479, |
|
"eval_dim_64_cosine_accuracy": 0.8781353525792712, |
|
"eval_dim_64_dot_accuracy": 0.12304779933743493, |
|
"eval_dim_64_euclidean_accuracy": 0.8783719829626124, |
|
"eval_dim_64_manhattan_accuracy": 0.879081874112636, |
|
"eval_dim_64_max_accuracy": 0.879081874112636, |
|
"eval_loss": 16.396345138549805, |
|
"eval_runtime": 103.471, |
|
"eval_samples_per_second": 81.685, |
|
"eval_sequential_score": 0.8781353525792712, |
|
"eval_steps_per_second": 2.561, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 11.106436684896929, |
|
"grad_norm": 3.5591487884521484, |
|
"learning_rate": 1.9092753778138885e-05, |
|
"loss": 16.6205, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 11.106436684896929, |
|
"eval_dim_128_cosine_accuracy": 0.8778987221959299, |
|
"eval_dim_128_dot_accuracy": 0.12316611452910553, |
|
"eval_dim_128_euclidean_accuracy": 0.8781353525792712, |
|
"eval_dim_128_manhattan_accuracy": 0.8780170373876006, |
|
"eval_dim_128_max_accuracy": 0.8781353525792712, |
|
"eval_dim_256_cosine_accuracy": 0.8787269285376242, |
|
"eval_dim_256_dot_accuracy": 0.121509701845717, |
|
"eval_dim_256_euclidean_accuracy": 0.8787269285376242, |
|
"eval_dim_256_manhattan_accuracy": 0.8793185044959773, |
|
"eval_dim_256_max_accuracy": 0.8793185044959773, |
|
"eval_dim_384_cosine_accuracy": 0.8793185044959773, |
|
"eval_dim_384_dot_accuracy": 0.12068149550402271, |
|
"eval_dim_384_euclidean_accuracy": 0.8793185044959773, |
|
"eval_dim_384_manhattan_accuracy": 0.8801467108376716, |
|
"eval_dim_384_max_accuracy": 0.8801467108376716, |
|
"eval_dim_64_cosine_accuracy": 0.8770705158542357, |
|
"eval_dim_64_dot_accuracy": 0.12541410317084714, |
|
"eval_dim_64_euclidean_accuracy": 0.8771888310459063, |
|
"eval_dim_64_manhattan_accuracy": 0.8776620918125887, |
|
"eval_dim_64_max_accuracy": 0.8776620918125887, |
|
"eval_loss": 16.401174545288086, |
|
"eval_runtime": 102.9169, |
|
"eval_samples_per_second": 82.124, |
|
"eval_sequential_score": 0.8770705158542357, |
|
"eval_steps_per_second": 2.575, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 11.442995372318048, |
|
"grad_norm": 3.712305784225464, |
|
"learning_rate": 1.8992072148958368e-05, |
|
"loss": 16.6095, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 11.442995372318048, |
|
"eval_dim_128_cosine_accuracy": 0.8786086133459536, |
|
"eval_dim_128_dot_accuracy": 0.12233790818741126, |
|
"eval_dim_128_euclidean_accuracy": 0.878490298154283, |
|
"eval_dim_128_manhattan_accuracy": 0.8786086133459536, |
|
"eval_dim_128_max_accuracy": 0.8786086133459536, |
|
"eval_dim_256_cosine_accuracy": 0.8789635589209654, |
|
"eval_dim_256_dot_accuracy": 0.1216280170373876, |
|
"eval_dim_256_euclidean_accuracy": 0.879081874112636, |
|
"eval_dim_256_manhattan_accuracy": 0.8780170373876006, |
|
"eval_dim_256_max_accuracy": 0.879081874112636, |
|
"eval_dim_384_cosine_accuracy": 0.8794368196876479, |
|
"eval_dim_384_dot_accuracy": 0.12056318031235211, |
|
"eval_dim_384_euclidean_accuracy": 0.8794368196876479, |
|
"eval_dim_384_manhattan_accuracy": 0.879081874112636, |
|
"eval_dim_384_max_accuracy": 0.8794368196876479, |
|
"eval_dim_64_cosine_accuracy": 0.879081874112636, |
|
"eval_dim_64_dot_accuracy": 0.12541410317084714, |
|
"eval_dim_64_euclidean_accuracy": 0.8777804070042593, |
|
"eval_dim_64_manhattan_accuracy": 0.8788452437292948, |
|
"eval_dim_64_max_accuracy": 0.879081874112636, |
|
"eval_loss": 16.413122177124023, |
|
"eval_runtime": 103.5898, |
|
"eval_samples_per_second": 81.591, |
|
"eval_sequential_score": 0.879081874112636, |
|
"eval_steps_per_second": 2.558, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 11.779554059739167, |
|
"grad_norm": 4.9205145835876465, |
|
"learning_rate": 1.888638866652356e-05, |
|
"loss": 16.5891, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 11.779554059739167, |
|
"eval_dim_128_cosine_accuracy": 0.8807382867960246, |
|
"eval_dim_128_dot_accuracy": 0.1194983435873166, |
|
"eval_dim_128_euclidean_accuracy": 0.8805016564126834, |
|
"eval_dim_128_manhattan_accuracy": 0.8792001893043067, |
|
"eval_dim_128_max_accuracy": 0.8807382867960246, |
|
"eval_dim_256_cosine_accuracy": 0.8805016564126834, |
|
"eval_dim_256_dot_accuracy": 0.11902508282063418, |
|
"eval_dim_256_euclidean_accuracy": 0.8797917652626597, |
|
"eval_dim_256_manhattan_accuracy": 0.8795551348793185, |
|
"eval_dim_256_max_accuracy": 0.8805016564126834, |
|
"eval_dim_384_cosine_accuracy": 0.8809749171793658, |
|
"eval_dim_384_dot_accuracy": 0.11902508282063418, |
|
"eval_dim_384_euclidean_accuracy": 0.8809749171793658, |
|
"eval_dim_384_manhattan_accuracy": 0.880028395646001, |
|
"eval_dim_384_max_accuracy": 0.8809749171793658, |
|
"eval_dim_64_cosine_accuracy": 0.8801467108376716, |
|
"eval_dim_64_dot_accuracy": 0.12292948414576432, |
|
"eval_dim_64_euclidean_accuracy": 0.879081874112636, |
|
"eval_dim_64_manhattan_accuracy": 0.879081874112636, |
|
"eval_dim_64_max_accuracy": 0.8801467108376716, |
|
"eval_loss": 16.40700340270996, |
|
"eval_runtime": 103.5887, |
|
"eval_samples_per_second": 81.592, |
|
"eval_sequential_score": 0.8801467108376716, |
|
"eval_steps_per_second": 2.558, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 12.116112747160287, |
|
"grad_norm": 4.849546909332275, |
|
"learning_rate": 1.8775762117425777e-05, |
|
"loss": 16.5619, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 12.116112747160287, |
|
"eval_dim_128_cosine_accuracy": 0.8794368196876479, |
|
"eval_dim_128_dot_accuracy": 0.121509701845717, |
|
"eval_dim_128_euclidean_accuracy": 0.8792001893043067, |
|
"eval_dim_128_manhattan_accuracy": 0.8789635589209654, |
|
"eval_dim_128_max_accuracy": 0.8794368196876479, |
|
"eval_dim_256_cosine_accuracy": 0.880028395646001, |
|
"eval_dim_256_dot_accuracy": 0.11973497397065783, |
|
"eval_dim_256_euclidean_accuracy": 0.8799100804543304, |
|
"eval_dim_256_manhattan_accuracy": 0.8792001893043067, |
|
"eval_dim_256_max_accuracy": 0.880028395646001, |
|
"eval_dim_384_cosine_accuracy": 0.8796734500709891, |
|
"eval_dim_384_dot_accuracy": 0.12032654992901089, |
|
"eval_dim_384_euclidean_accuracy": 0.8796734500709891, |
|
"eval_dim_384_manhattan_accuracy": 0.8796734500709891, |
|
"eval_dim_384_max_accuracy": 0.8796734500709891, |
|
"eval_dim_64_cosine_accuracy": 0.8780170373876006, |
|
"eval_dim_64_dot_accuracy": 0.12470421202082348, |
|
"eval_dim_64_euclidean_accuracy": 0.8797917652626597, |
|
"eval_dim_64_manhattan_accuracy": 0.8786086133459536, |
|
"eval_dim_64_max_accuracy": 0.8797917652626597, |
|
"eval_loss": 16.396265029907227, |
|
"eval_runtime": 102.3506, |
|
"eval_samples_per_second": 82.579, |
|
"eval_sequential_score": 0.8780170373876006, |
|
"eval_steps_per_second": 2.589, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 12.452671434581404, |
|
"grad_norm": 4.944924831390381, |
|
"learning_rate": 1.866025403784439e-05, |
|
"loss": 16.5467, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 12.452671434581404, |
|
"eval_dim_128_cosine_accuracy": 0.8795551348793185, |
|
"eval_dim_128_dot_accuracy": 0.12316611452910553, |
|
"eval_dim_128_euclidean_accuracy": 0.8787269285376242, |
|
"eval_dim_128_manhattan_accuracy": 0.8794368196876479, |
|
"eval_dim_128_max_accuracy": 0.8795551348793185, |
|
"eval_dim_256_cosine_accuracy": 0.880619971604354, |
|
"eval_dim_256_dot_accuracy": 0.12068149550402271, |
|
"eval_dim_256_euclidean_accuracy": 0.8794368196876479, |
|
"eval_dim_256_manhattan_accuracy": 0.8801467108376716, |
|
"eval_dim_256_max_accuracy": 0.880619971604354, |
|
"eval_dim_384_cosine_accuracy": 0.8803833412210128, |
|
"eval_dim_384_dot_accuracy": 0.11961665877898722, |
|
"eval_dim_384_euclidean_accuracy": 0.8803833412210128, |
|
"eval_dim_384_manhattan_accuracy": 0.8807382867960246, |
|
"eval_dim_384_max_accuracy": 0.8807382867960246, |
|
"eval_dim_64_cosine_accuracy": 0.8789635589209654, |
|
"eval_dim_64_dot_accuracy": 0.12470421202082348, |
|
"eval_dim_64_euclidean_accuracy": 0.8781353525792712, |
|
"eval_dim_64_manhattan_accuracy": 0.8796734500709891, |
|
"eval_dim_64_max_accuracy": 0.8796734500709891, |
|
"eval_loss": 16.399133682250977, |
|
"eval_runtime": 104.1432, |
|
"eval_samples_per_second": 81.157, |
|
"eval_sequential_score": 0.8789635589209654, |
|
"eval_steps_per_second": 2.545, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 12.789230122002524, |
|
"grad_norm": 6.032313346862793, |
|
"learning_rate": 1.853992867931721e-05, |
|
"loss": 16.5398, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 12.789230122002524, |
|
"eval_dim_128_cosine_accuracy": 0.8792001893043067, |
|
"eval_dim_128_dot_accuracy": 0.12139138665404638, |
|
"eval_dim_128_euclidean_accuracy": 0.8797917652626597, |
|
"eval_dim_128_manhattan_accuracy": 0.8787269285376242, |
|
"eval_dim_128_max_accuracy": 0.8797917652626597, |
|
"eval_dim_256_cosine_accuracy": 0.8797917652626597, |
|
"eval_dim_256_dot_accuracy": 0.11973497397065783, |
|
"eval_dim_256_euclidean_accuracy": 0.8797917652626597, |
|
"eval_dim_256_manhattan_accuracy": 0.8792001893043067, |
|
"eval_dim_256_max_accuracy": 0.8797917652626597, |
|
"eval_dim_384_cosine_accuracy": 0.8801467108376716, |
|
"eval_dim_384_dot_accuracy": 0.11985328916232844, |
|
"eval_dim_384_euclidean_accuracy": 0.8801467108376716, |
|
"eval_dim_384_manhattan_accuracy": 0.8805016564126834, |
|
"eval_dim_384_max_accuracy": 0.8805016564126834, |
|
"eval_dim_64_cosine_accuracy": 0.8788452437292948, |
|
"eval_dim_64_dot_accuracy": 0.12423095125414103, |
|
"eval_dim_64_euclidean_accuracy": 0.8793185044959773, |
|
"eval_dim_64_manhattan_accuracy": 0.8776620918125887, |
|
"eval_dim_64_max_accuracy": 0.8793185044959773, |
|
"eval_loss": 16.397045135498047, |
|
"eval_runtime": 103.5361, |
|
"eval_samples_per_second": 81.633, |
|
"eval_sequential_score": 0.8788452437292948, |
|
"eval_steps_per_second": 2.559, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 13.125788809423643, |
|
"grad_norm": 4.27797269821167, |
|
"learning_rate": 1.8414852973000503e-05, |
|
"loss": 16.5047, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 13.125788809423643, |
|
"eval_dim_128_cosine_accuracy": 0.8795551348793185, |
|
"eval_dim_128_dot_accuracy": 0.1216280170373876, |
|
"eval_dim_128_euclidean_accuracy": 0.8796734500709891, |
|
"eval_dim_128_manhattan_accuracy": 0.8797917652626597, |
|
"eval_dim_128_max_accuracy": 0.8797917652626597, |
|
"eval_dim_256_cosine_accuracy": 0.8803833412210128, |
|
"eval_dim_256_dot_accuracy": 0.12068149550402271, |
|
"eval_dim_256_euclidean_accuracy": 0.8797917652626597, |
|
"eval_dim_256_manhattan_accuracy": 0.8797917652626597, |
|
"eval_dim_256_max_accuracy": 0.8803833412210128, |
|
"eval_dim_384_cosine_accuracy": 0.8803833412210128, |
|
"eval_dim_384_dot_accuracy": 0.11961665877898722, |
|
"eval_dim_384_euclidean_accuracy": 0.8803833412210128, |
|
"eval_dim_384_manhattan_accuracy": 0.8805016564126834, |
|
"eval_dim_384_max_accuracy": 0.8805016564126834, |
|
"eval_dim_64_cosine_accuracy": 0.8788452437292948, |
|
"eval_dim_64_dot_accuracy": 0.12588736393752958, |
|
"eval_dim_64_euclidean_accuracy": 0.8793185044959773, |
|
"eval_dim_64_manhattan_accuracy": 0.8802650260293422, |
|
"eval_dim_64_max_accuracy": 0.8802650260293422, |
|
"eval_loss": 16.396381378173828, |
|
"eval_runtime": 102.672, |
|
"eval_samples_per_second": 82.32, |
|
"eval_sequential_score": 0.8788452437292948, |
|
"eval_steps_per_second": 2.581, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 13.462347496844762, |
|
"grad_norm": 4.051229953765869, |
|
"learning_rate": 1.8285096492438424e-05, |
|
"loss": 16.4985, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 13.462347496844762, |
|
"eval_dim_128_cosine_accuracy": 0.8793185044959773, |
|
"eval_dim_128_dot_accuracy": 0.12127307146237577, |
|
"eval_dim_128_euclidean_accuracy": 0.8803833412210128, |
|
"eval_dim_128_manhattan_accuracy": 0.8796734500709891, |
|
"eval_dim_128_max_accuracy": 0.8803833412210128, |
|
"eval_dim_256_cosine_accuracy": 0.8797917652626597, |
|
"eval_dim_256_dot_accuracy": 0.12020823473734027, |
|
"eval_dim_256_euclidean_accuracy": 0.8796734500709891, |
|
"eval_dim_256_manhattan_accuracy": 0.8797917652626597, |
|
"eval_dim_256_max_accuracy": 0.8797917652626597, |
|
"eval_dim_384_cosine_accuracy": 0.8807382867960246, |
|
"eval_dim_384_dot_accuracy": 0.11926171320397538, |
|
"eval_dim_384_euclidean_accuracy": 0.8807382867960246, |
|
"eval_dim_384_manhattan_accuracy": 0.8810932323710364, |
|
"eval_dim_384_max_accuracy": 0.8810932323710364, |
|
"eval_dim_64_cosine_accuracy": 0.8789635589209654, |
|
"eval_dim_64_dot_accuracy": 0.12316611452910553, |
|
"eval_dim_64_euclidean_accuracy": 0.8787269285376242, |
|
"eval_dim_64_manhattan_accuracy": 0.879081874112636, |
|
"eval_dim_64_max_accuracy": 0.879081874112636, |
|
"eval_loss": 16.4024600982666, |
|
"eval_runtime": 104.2185, |
|
"eval_samples_per_second": 81.099, |
|
"eval_sequential_score": 0.8789635589209654, |
|
"eval_steps_per_second": 2.543, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 13.798906184265881, |
|
"grad_norm": 4.3837666511535645, |
|
"learning_rate": 1.8150731414862623e-05, |
|
"loss": 16.4852, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 13.798906184265881, |
|
"eval_dim_128_cosine_accuracy": 0.8801467108376716, |
|
"eval_dim_128_dot_accuracy": 0.12032654992901089, |
|
"eval_dim_128_euclidean_accuracy": 0.8805016564126834, |
|
"eval_dim_128_manhattan_accuracy": 0.8797917652626597, |
|
"eval_dim_128_max_accuracy": 0.8805016564126834, |
|
"eval_dim_256_cosine_accuracy": 0.8809749171793658, |
|
"eval_dim_256_dot_accuracy": 0.119380028395646, |
|
"eval_dim_256_euclidean_accuracy": 0.8814481779460482, |
|
"eval_dim_256_manhattan_accuracy": 0.8807382867960246, |
|
"eval_dim_256_max_accuracy": 0.8814481779460482, |
|
"eval_dim_384_cosine_accuracy": 0.880028395646001, |
|
"eval_dim_384_dot_accuracy": 0.11997160435399905, |
|
"eval_dim_384_euclidean_accuracy": 0.880028395646001, |
|
"eval_dim_384_manhattan_accuracy": 0.8794368196876479, |
|
"eval_dim_384_max_accuracy": 0.880028395646001, |
|
"eval_dim_64_cosine_accuracy": 0.8793185044959773, |
|
"eval_dim_64_dot_accuracy": 0.12352106010411737, |
|
"eval_dim_64_euclidean_accuracy": 0.8801467108376716, |
|
"eval_dim_64_manhattan_accuracy": 0.8796734500709891, |
|
"eval_dim_64_max_accuracy": 0.8801467108376716, |
|
"eval_loss": 16.410737991333008, |
|
"eval_runtime": 102.5333, |
|
"eval_samples_per_second": 82.432, |
|
"eval_sequential_score": 0.8793185044959773, |
|
"eval_steps_per_second": 2.585, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 14.135464871687, |
|
"grad_norm": 4.87747859954834, |
|
"learning_rate": 1.8011832481043577e-05, |
|
"loss": 16.4526, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 14.135464871687, |
|
"eval_dim_128_cosine_accuracy": 0.8796734500709891, |
|
"eval_dim_128_dot_accuracy": 0.12103644107903455, |
|
"eval_dim_128_euclidean_accuracy": 0.8796734500709891, |
|
"eval_dim_128_manhattan_accuracy": 0.8794368196876479, |
|
"eval_dim_128_max_accuracy": 0.8796734500709891, |
|
"eval_dim_256_cosine_accuracy": 0.8801467108376716, |
|
"eval_dim_256_dot_accuracy": 0.12068149550402271, |
|
"eval_dim_256_euclidean_accuracy": 0.8805016564126834, |
|
"eval_dim_256_manhattan_accuracy": 0.8797917652626597, |
|
"eval_dim_256_max_accuracy": 0.8805016564126834, |
|
"eval_dim_384_cosine_accuracy": 0.8808566019876952, |
|
"eval_dim_384_dot_accuracy": 0.11914339801230478, |
|
"eval_dim_384_euclidean_accuracy": 0.8808566019876952, |
|
"eval_dim_384_manhattan_accuracy": 0.8810932323710364, |
|
"eval_dim_384_max_accuracy": 0.8810932323710364, |
|
"eval_dim_64_cosine_accuracy": 0.8778987221959299, |
|
"eval_dim_64_dot_accuracy": 0.12470421202082348, |
|
"eval_dim_64_euclidean_accuracy": 0.8781353525792712, |
|
"eval_dim_64_manhattan_accuracy": 0.879081874112636, |
|
"eval_dim_64_max_accuracy": 0.879081874112636, |
|
"eval_loss": 16.392879486083984, |
|
"eval_runtime": 103.5589, |
|
"eval_samples_per_second": 81.615, |
|
"eval_sequential_score": 0.8778987221959299, |
|
"eval_steps_per_second": 2.559, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 14.47202355910812, |
|
"grad_norm": 6.463150978088379, |
|
"learning_rate": 1.78684769537159e-05, |
|
"loss": 16.4343, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 14.47202355910812, |
|
"eval_dim_128_cosine_accuracy": 0.8788452437292948, |
|
"eval_dim_128_dot_accuracy": 0.12221959299574066, |
|
"eval_dim_128_euclidean_accuracy": 0.878490298154283, |
|
"eval_dim_128_manhattan_accuracy": 0.8797917652626597, |
|
"eval_dim_128_max_accuracy": 0.8797917652626597, |
|
"eval_dim_256_cosine_accuracy": 0.879081874112636, |
|
"eval_dim_256_dot_accuracy": 0.121509701845717, |
|
"eval_dim_256_euclidean_accuracy": 0.8786086133459536, |
|
"eval_dim_256_manhattan_accuracy": 0.8797917652626597, |
|
"eval_dim_256_max_accuracy": 0.8797917652626597, |
|
"eval_dim_384_cosine_accuracy": 0.8796734500709891, |
|
"eval_dim_384_dot_accuracy": 0.12032654992901089, |
|
"eval_dim_384_euclidean_accuracy": 0.8796734500709891, |
|
"eval_dim_384_manhattan_accuracy": 0.880619971604354, |
|
"eval_dim_384_max_accuracy": 0.880619971604354, |
|
"eval_dim_64_cosine_accuracy": 0.8774254614292475, |
|
"eval_dim_64_dot_accuracy": 0.1250591575958353, |
|
"eval_dim_64_euclidean_accuracy": 0.8774254614292475, |
|
"eval_dim_64_manhattan_accuracy": 0.8780170373876006, |
|
"eval_dim_64_max_accuracy": 0.8780170373876006, |
|
"eval_loss": 16.40749740600586, |
|
"eval_runtime": 102.9532, |
|
"eval_samples_per_second": 82.096, |
|
"eval_sequential_score": 0.8774254614292475, |
|
"eval_steps_per_second": 2.574, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 14.80858224652924, |
|
"grad_norm": 4.839356422424316, |
|
"learning_rate": 1.7720744574600865e-05, |
|
"loss": 16.4244, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 14.80858224652924, |
|
"eval_dim_128_cosine_accuracy": 0.8803833412210128, |
|
"eval_dim_128_dot_accuracy": 0.11973497397065783, |
|
"eval_dim_128_euclidean_accuracy": 0.880619971604354, |
|
"eval_dim_128_manhattan_accuracy": 0.8809749171793658, |
|
"eval_dim_128_max_accuracy": 0.8809749171793658, |
|
"eval_dim_256_cosine_accuracy": 0.8819214387127308, |
|
"eval_dim_256_dot_accuracy": 0.119380028395646, |
|
"eval_dim_256_euclidean_accuracy": 0.8815664931377188, |
|
"eval_dim_256_manhattan_accuracy": 0.8814481779460482, |
|
"eval_dim_256_max_accuracy": 0.8819214387127308, |
|
"eval_dim_384_cosine_accuracy": 0.8820397539044014, |
|
"eval_dim_384_dot_accuracy": 0.11796024609559867, |
|
"eval_dim_384_euclidean_accuracy": 0.8820397539044014, |
|
"eval_dim_384_manhattan_accuracy": 0.882158069096072, |
|
"eval_dim_384_max_accuracy": 0.882158069096072, |
|
"eval_dim_64_cosine_accuracy": 0.8808566019876952, |
|
"eval_dim_64_dot_accuracy": 0.12221959299574066, |
|
"eval_dim_64_euclidean_accuracy": 0.880619971604354, |
|
"eval_dim_64_manhattan_accuracy": 0.8786086133459536, |
|
"eval_dim_64_max_accuracy": 0.8808566019876952, |
|
"eval_loss": 16.402673721313477, |
|
"eval_runtime": 103.4179, |
|
"eval_samples_per_second": 81.727, |
|
"eval_sequential_score": 0.8808566019876952, |
|
"eval_steps_per_second": 2.562, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 15.145140933950358, |
|
"grad_norm": 5.812349796295166, |
|
"learning_rate": 1.756871752004992e-05, |
|
"loss": 16.3947, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 15.145140933950358, |
|
"eval_dim_128_cosine_accuracy": 0.879081874112636, |
|
"eval_dim_128_dot_accuracy": 0.12316611452910553, |
|
"eval_dim_128_euclidean_accuracy": 0.8786086133459536, |
|
"eval_dim_128_manhattan_accuracy": 0.8809749171793658, |
|
"eval_dim_128_max_accuracy": 0.8809749171793658, |
|
"eval_dim_256_cosine_accuracy": 0.8792001893043067, |
|
"eval_dim_256_dot_accuracy": 0.12139138665404638, |
|
"eval_dim_256_euclidean_accuracy": 0.8801467108376716, |
|
"eval_dim_256_manhattan_accuracy": 0.8813298627543776, |
|
"eval_dim_256_max_accuracy": 0.8813298627543776, |
|
"eval_dim_384_cosine_accuracy": 0.8802650260293422, |
|
"eval_dim_384_dot_accuracy": 0.11973497397065783, |
|
"eval_dim_384_euclidean_accuracy": 0.8802650260293422, |
|
"eval_dim_384_manhattan_accuracy": 0.8808566019876952, |
|
"eval_dim_384_max_accuracy": 0.8808566019876952, |
|
"eval_dim_64_cosine_accuracy": 0.8773071462375769, |
|
"eval_dim_64_dot_accuracy": 0.12695220066256507, |
|
"eval_dim_64_euclidean_accuracy": 0.8768338854708945, |
|
"eval_dim_64_manhattan_accuracy": 0.8787269285376242, |
|
"eval_dim_64_max_accuracy": 0.8787269285376242, |
|
"eval_loss": 16.4101619720459, |
|
"eval_runtime": 105.1832, |
|
"eval_samples_per_second": 80.355, |
|
"eval_sequential_score": 0.8773071462375769, |
|
"eval_steps_per_second": 2.519, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 15.481699621371476, |
|
"grad_norm": 4.386394023895264, |
|
"learning_rate": 1.7412480355334006e-05, |
|
"loss": 16.3827, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 15.481699621371476, |
|
"eval_dim_128_cosine_accuracy": 0.8803833412210128, |
|
"eval_dim_128_dot_accuracy": 0.12245622337908188, |
|
"eval_dim_128_euclidean_accuracy": 0.880619971604354, |
|
"eval_dim_128_manhattan_accuracy": 0.88180312352106, |
|
"eval_dim_128_max_accuracy": 0.88180312352106, |
|
"eval_dim_256_cosine_accuracy": 0.8813298627543776, |
|
"eval_dim_256_dot_accuracy": 0.12079981069569333, |
|
"eval_dim_256_euclidean_accuracy": 0.8809749171793658, |
|
"eval_dim_256_manhattan_accuracy": 0.8819214387127308, |
|
"eval_dim_256_max_accuracy": 0.8819214387127308, |
|
"eval_dim_384_cosine_accuracy": 0.8813298627543776, |
|
"eval_dim_384_dot_accuracy": 0.11867013724562234, |
|
"eval_dim_384_euclidean_accuracy": 0.8813298627543776, |
|
"eval_dim_384_manhattan_accuracy": 0.8809749171793658, |
|
"eval_dim_384_max_accuracy": 0.8813298627543776, |
|
"eval_dim_64_cosine_accuracy": 0.8781353525792712, |
|
"eval_dim_64_dot_accuracy": 0.1260056791292002, |
|
"eval_dim_64_euclidean_accuracy": 0.8789635589209654, |
|
"eval_dim_64_manhattan_accuracy": 0.8814481779460482, |
|
"eval_dim_64_max_accuracy": 0.8814481779460482, |
|
"eval_loss": 16.404207229614258, |
|
"eval_runtime": 101.3893, |
|
"eval_samples_per_second": 83.362, |
|
"eval_sequential_score": 0.8781353525792712, |
|
"eval_steps_per_second": 2.614, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 15.818258308792595, |
|
"grad_norm": 4.8762359619140625, |
|
"learning_rate": 1.7252119987603976e-05, |
|
"loss": 16.3719, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 15.818258308792595, |
|
"eval_dim_128_cosine_accuracy": 0.8801467108376716, |
|
"eval_dim_128_dot_accuracy": 0.12032654992901089, |
|
"eval_dim_128_euclidean_accuracy": 0.8802650260293422, |
|
"eval_dim_128_manhattan_accuracy": 0.8802650260293422, |
|
"eval_dim_128_max_accuracy": 0.8802650260293422, |
|
"eval_dim_256_cosine_accuracy": 0.88180312352106, |
|
"eval_dim_256_dot_accuracy": 0.11878845243729295, |
|
"eval_dim_256_euclidean_accuracy": 0.8820397539044014, |
|
"eval_dim_256_manhattan_accuracy": 0.881211547562707, |
|
"eval_dim_256_max_accuracy": 0.8820397539044014, |
|
"eval_dim_384_cosine_accuracy": 0.8820397539044014, |
|
"eval_dim_384_dot_accuracy": 0.11796024609559867, |
|
"eval_dim_384_euclidean_accuracy": 0.8820397539044014, |
|
"eval_dim_384_manhattan_accuracy": 0.8808566019876952, |
|
"eval_dim_384_max_accuracy": 0.8820397539044014, |
|
"eval_dim_64_cosine_accuracy": 0.879081874112636, |
|
"eval_dim_64_dot_accuracy": 0.12458589682915286, |
|
"eval_dim_64_euclidean_accuracy": 0.8805016564126834, |
|
"eval_dim_64_manhattan_accuracy": 0.8797917652626597, |
|
"eval_dim_64_max_accuracy": 0.8805016564126834, |
|
"eval_loss": 16.40033721923828, |
|
"eval_runtime": 104.1264, |
|
"eval_samples_per_second": 81.171, |
|
"eval_sequential_score": 0.879081874112636, |
|
"eval_steps_per_second": 2.545, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 16.154816996213714, |
|
"grad_norm": 5.414395809173584, |
|
"learning_rate": 1.7087725617548385e-05, |
|
"loss": 16.3403, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 16.154816996213714, |
|
"eval_dim_128_cosine_accuracy": 0.8781353525792712, |
|
"eval_dim_128_dot_accuracy": 0.12328442972077615, |
|
"eval_dim_128_euclidean_accuracy": 0.8778987221959299, |
|
"eval_dim_128_manhattan_accuracy": 0.8787269285376242, |
|
"eval_dim_128_max_accuracy": 0.8787269285376242, |
|
"eval_dim_256_cosine_accuracy": 0.879081874112636, |
|
"eval_dim_256_dot_accuracy": 0.12210127780407004, |
|
"eval_dim_256_euclidean_accuracy": 0.8788452437292948, |
|
"eval_dim_256_manhattan_accuracy": 0.879081874112636, |
|
"eval_dim_256_max_accuracy": 0.879081874112636, |
|
"eval_dim_384_cosine_accuracy": 0.8799100804543304, |
|
"eval_dim_384_dot_accuracy": 0.12008991954566967, |
|
"eval_dim_384_euclidean_accuracy": 0.8799100804543304, |
|
"eval_dim_384_manhattan_accuracy": 0.8794368196876479, |
|
"eval_dim_384_max_accuracy": 0.8799100804543304, |
|
"eval_dim_64_cosine_accuracy": 0.8767155702792239, |
|
"eval_dim_64_dot_accuracy": 0.12766209181258872, |
|
"eval_dim_64_euclidean_accuracy": 0.8765972550875533, |
|
"eval_dim_64_manhattan_accuracy": 0.8769522006625651, |
|
"eval_dim_64_max_accuracy": 0.8769522006625651, |
|
"eval_loss": 16.413236618041992, |
|
"eval_runtime": 105.1626, |
|
"eval_samples_per_second": 80.371, |
|
"eval_sequential_score": 0.8767155702792239, |
|
"eval_steps_per_second": 2.52, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 16.491375683634836, |
|
"grad_norm": 4.138753414154053, |
|
"learning_rate": 1.6919388689775463e-05, |
|
"loss": 16.3357, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 16.491375683634836, |
|
"eval_dim_128_cosine_accuracy": 0.8803833412210128, |
|
"eval_dim_128_dot_accuracy": 0.1216280170373876, |
|
"eval_dim_128_euclidean_accuracy": 0.879081874112636, |
|
"eval_dim_128_manhattan_accuracy": 0.8802650260293422, |
|
"eval_dim_128_max_accuracy": 0.8803833412210128, |
|
"eval_dim_256_cosine_accuracy": 0.8808566019876952, |
|
"eval_dim_256_dot_accuracy": 0.121509701845717, |
|
"eval_dim_256_euclidean_accuracy": 0.8802650260293422, |
|
"eval_dim_256_manhattan_accuracy": 0.8807382867960246, |
|
"eval_dim_256_max_accuracy": 0.8808566019876952, |
|
"eval_dim_384_cosine_accuracy": 0.8807382867960246, |
|
"eval_dim_384_dot_accuracy": 0.11926171320397538, |
|
"eval_dim_384_euclidean_accuracy": 0.8807382867960246, |
|
"eval_dim_384_manhattan_accuracy": 0.8801467108376716, |
|
"eval_dim_384_max_accuracy": 0.8807382867960246, |
|
"eval_dim_64_cosine_accuracy": 0.8792001893043067, |
|
"eval_dim_64_dot_accuracy": 0.12647893989588263, |
|
"eval_dim_64_euclidean_accuracy": 0.8786086133459536, |
|
"eval_dim_64_manhattan_accuracy": 0.8807382867960246, |
|
"eval_dim_64_max_accuracy": 0.8807382867960246, |
|
"eval_loss": 16.414878845214844, |
|
"eval_runtime": 100.6398, |
|
"eval_samples_per_second": 83.983, |
|
"eval_sequential_score": 0.8792001893043067, |
|
"eval_steps_per_second": 2.633, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 16.827934371055953, |
|
"grad_norm": 4.080146312713623, |
|
"learning_rate": 1.6747202841946928e-05, |
|
"loss": 16.3203, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 16.827934371055953, |
|
"eval_dim_128_cosine_accuracy": 0.8803833412210128, |
|
"eval_dim_128_dot_accuracy": 0.12186464742072882, |
|
"eval_dim_128_euclidean_accuracy": 0.8814481779460482, |
|
"eval_dim_128_manhattan_accuracy": 0.880619971604354, |
|
"eval_dim_128_max_accuracy": 0.8814481779460482, |
|
"eval_dim_256_cosine_accuracy": 0.8814481779460482, |
|
"eval_dim_256_dot_accuracy": 0.12044486512068149, |
|
"eval_dim_256_euclidean_accuracy": 0.8820397539044014, |
|
"eval_dim_256_manhattan_accuracy": 0.8825130146710838, |
|
"eval_dim_256_max_accuracy": 0.8825130146710838, |
|
"eval_dim_384_cosine_accuracy": 0.8815664931377188, |
|
"eval_dim_384_dot_accuracy": 0.11843350686228112, |
|
"eval_dim_384_euclidean_accuracy": 0.8815664931377188, |
|
"eval_dim_384_manhattan_accuracy": 0.8815664931377188, |
|
"eval_dim_384_max_accuracy": 0.8815664931377188, |
|
"eval_dim_64_cosine_accuracy": 0.879081874112636, |
|
"eval_dim_64_dot_accuracy": 0.12565073355418835, |
|
"eval_dim_64_euclidean_accuracy": 0.880028395646001, |
|
"eval_dim_64_manhattan_accuracy": 0.8810932323710364, |
|
"eval_dim_64_max_accuracy": 0.8810932323710364, |
|
"eval_loss": 16.408126831054688, |
|
"eval_runtime": 103.4973, |
|
"eval_samples_per_second": 81.664, |
|
"eval_sequential_score": 0.879081874112636, |
|
"eval_steps_per_second": 2.56, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 17.16449305847707, |
|
"grad_norm": 5.26322078704834, |
|
"learning_rate": 1.6571263852691887e-05, |
|
"loss": 16.2986, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 17.16449305847707, |
|
"eval_dim_128_cosine_accuracy": 0.8797917652626597, |
|
"eval_dim_128_dot_accuracy": 0.12304779933743493, |
|
"eval_dim_128_euclidean_accuracy": 0.8801467108376716, |
|
"eval_dim_128_manhattan_accuracy": 0.8778987221959299, |
|
"eval_dim_128_max_accuracy": 0.8801467108376716, |
|
"eval_dim_256_cosine_accuracy": 0.880028395646001, |
|
"eval_dim_256_dot_accuracy": 0.12068149550402271, |
|
"eval_dim_256_euclidean_accuracy": 0.880619971604354, |
|
"eval_dim_256_manhattan_accuracy": 0.8805016564126834, |
|
"eval_dim_256_max_accuracy": 0.880619971604354, |
|
"eval_dim_384_cosine_accuracy": 0.8820397539044014, |
|
"eval_dim_384_dot_accuracy": 0.11796024609559867, |
|
"eval_dim_384_euclidean_accuracy": 0.8820397539044014, |
|
"eval_dim_384_manhattan_accuracy": 0.8808566019876952, |
|
"eval_dim_384_max_accuracy": 0.8820397539044014, |
|
"eval_dim_64_cosine_accuracy": 0.879081874112636, |
|
"eval_dim_64_dot_accuracy": 0.12707051585423568, |
|
"eval_dim_64_euclidean_accuracy": 0.8781353525792712, |
|
"eval_dim_64_manhattan_accuracy": 0.8778987221959299, |
|
"eval_dim_64_max_accuracy": 0.879081874112636, |
|
"eval_loss": 16.413921356201172, |
|
"eval_runtime": 103.9357, |
|
"eval_samples_per_second": 81.32, |
|
"eval_sequential_score": 0.879081874112636, |
|
"eval_steps_per_second": 2.55, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 17.50105174589819, |
|
"grad_norm": 9.353097915649414, |
|
"learning_rate": 1.639166958832985e-05, |
|
"loss": 16.2923, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 17.50105174589819, |
|
"eval_dim_128_cosine_accuracy": 0.8786086133459536, |
|
"eval_dim_128_dot_accuracy": 0.12352106010411737, |
|
"eval_dim_128_euclidean_accuracy": 0.8783719829626124, |
|
"eval_dim_128_manhattan_accuracy": 0.8807382867960246, |
|
"eval_dim_128_max_accuracy": 0.8807382867960246, |
|
"eval_dim_256_cosine_accuracy": 0.8792001893043067, |
|
"eval_dim_256_dot_accuracy": 0.12103644107903455, |
|
"eval_dim_256_euclidean_accuracy": 0.8796734500709891, |
|
"eval_dim_256_manhattan_accuracy": 0.8810932323710364, |
|
"eval_dim_256_max_accuracy": 0.8810932323710364, |
|
"eval_dim_384_cosine_accuracy": 0.8799100804543304, |
|
"eval_dim_384_dot_accuracy": 0.12008991954566967, |
|
"eval_dim_384_euclidean_accuracy": 0.8799100804543304, |
|
"eval_dim_384_manhattan_accuracy": 0.880028395646001, |
|
"eval_dim_384_max_accuracy": 0.880028395646001, |
|
"eval_dim_64_cosine_accuracy": 0.8768338854708945, |
|
"eval_dim_64_dot_accuracy": 0.12754377662091812, |
|
"eval_dim_64_euclidean_accuracy": 0.8762423095125415, |
|
"eval_dim_64_manhattan_accuracy": 0.8789635589209654, |
|
"eval_dim_64_max_accuracy": 0.8789635589209654, |
|
"eval_loss": 16.406217575073242, |
|
"eval_runtime": 101.8719, |
|
"eval_samples_per_second": 82.967, |
|
"eval_sequential_score": 0.8768338854708945, |
|
"eval_steps_per_second": 2.601, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 17.83761043331931, |
|
"grad_norm": 5.8258891105651855, |
|
"learning_rate": 1.6208519948432438e-05, |
|
"loss": 16.2649, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 17.83761043331931, |
|
"eval_dim_128_cosine_accuracy": 0.880028395646001, |
|
"eval_dim_128_dot_accuracy": 0.12186464742072882, |
|
"eval_dim_128_euclidean_accuracy": 0.8803833412210128, |
|
"eval_dim_128_manhattan_accuracy": 0.8799100804543304, |
|
"eval_dim_128_max_accuracy": 0.8803833412210128, |
|
"eval_dim_256_cosine_accuracy": 0.8807382867960246, |
|
"eval_dim_256_dot_accuracy": 0.12210127780407004, |
|
"eval_dim_256_euclidean_accuracy": 0.8814481779460482, |
|
"eval_dim_256_manhattan_accuracy": 0.8810932323710364, |
|
"eval_dim_256_max_accuracy": 0.8814481779460482, |
|
"eval_dim_384_cosine_accuracy": 0.8814481779460482, |
|
"eval_dim_384_dot_accuracy": 0.11855182205395173, |
|
"eval_dim_384_euclidean_accuracy": 0.8814481779460482, |
|
"eval_dim_384_manhattan_accuracy": 0.8814481779460482, |
|
"eval_dim_384_max_accuracy": 0.8814481779460482, |
|
"eval_dim_64_cosine_accuracy": 0.8787269285376242, |
|
"eval_dim_64_dot_accuracy": 0.1283719829626124, |
|
"eval_dim_64_euclidean_accuracy": 0.8788452437292948, |
|
"eval_dim_64_manhattan_accuracy": 0.8799100804543304, |
|
"eval_dim_64_max_accuracy": 0.8799100804543304, |
|
"eval_loss": 16.410572052001953, |
|
"eval_runtime": 101.9269, |
|
"eval_samples_per_second": 82.922, |
|
"eval_sequential_score": 0.8787269285376242, |
|
"eval_steps_per_second": 2.6, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 18.17416912074043, |
|
"grad_norm": 4.463468074798584, |
|
"learning_rate": 1.6021916810254096e-05, |
|
"loss": 16.2505, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 18.17416912074043, |
|
"eval_dim_128_cosine_accuracy": 0.8786086133459536, |
|
"eval_dim_128_dot_accuracy": 0.12411263606247042, |
|
"eval_dim_128_euclidean_accuracy": 0.8780170373876006, |
|
"eval_dim_128_manhattan_accuracy": 0.8792001893043067, |
|
"eval_dim_128_max_accuracy": 0.8792001893043067, |
|
"eval_dim_256_cosine_accuracy": 0.8793185044959773, |
|
"eval_dim_256_dot_accuracy": 0.12210127780407004, |
|
"eval_dim_256_euclidean_accuracy": 0.8793185044959773, |
|
"eval_dim_256_manhattan_accuracy": 0.8805016564126834, |
|
"eval_dim_256_max_accuracy": 0.8805016564126834, |
|
"eval_dim_384_cosine_accuracy": 0.8802650260293422, |
|
"eval_dim_384_dot_accuracy": 0.11973497397065783, |
|
"eval_dim_384_euclidean_accuracy": 0.8802650260293422, |
|
"eval_dim_384_manhattan_accuracy": 0.8813298627543776, |
|
"eval_dim_384_max_accuracy": 0.8813298627543776, |
|
"eval_dim_64_cosine_accuracy": 0.8770705158542357, |
|
"eval_dim_64_dot_accuracy": 0.13014671083767157, |
|
"eval_dim_64_euclidean_accuracy": 0.8761239943208708, |
|
"eval_dim_64_manhattan_accuracy": 0.8787269285376242, |
|
"eval_dim_64_max_accuracy": 0.8787269285376242, |
|
"eval_loss": 16.418752670288086, |
|
"eval_runtime": 106.398, |
|
"eval_samples_per_second": 79.438, |
|
"eval_sequential_score": 0.8770705158542357, |
|
"eval_steps_per_second": 2.491, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 18.510727808161548, |
|
"grad_norm": 5.066239833831787, |
|
"learning_rate": 1.5831963972062734e-05, |
|
"loss": 16.226, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 18.510727808161548, |
|
"eval_dim_128_cosine_accuracy": 0.8770705158542357, |
|
"eval_dim_128_dot_accuracy": 0.12446758163748226, |
|
"eval_dim_128_euclidean_accuracy": 0.8771888310459063, |
|
"eval_dim_128_manhattan_accuracy": 0.8778987221959299, |
|
"eval_dim_128_max_accuracy": 0.8778987221959299, |
|
"eval_dim_256_cosine_accuracy": 0.8781353525792712, |
|
"eval_dim_256_dot_accuracy": 0.12304779933743493, |
|
"eval_dim_256_euclidean_accuracy": 0.8788452437292948, |
|
"eval_dim_256_manhattan_accuracy": 0.8799100804543304, |
|
"eval_dim_256_max_accuracy": 0.8799100804543304, |
|
"eval_dim_384_cosine_accuracy": 0.8780170373876006, |
|
"eval_dim_384_dot_accuracy": 0.12198296261239944, |
|
"eval_dim_384_euclidean_accuracy": 0.8780170373876006, |
|
"eval_dim_384_manhattan_accuracy": 0.8770705158542357, |
|
"eval_dim_384_max_accuracy": 0.8780170373876006, |
|
"eval_dim_64_cosine_accuracy": 0.8765972550875533, |
|
"eval_dim_64_dot_accuracy": 0.12884524372929484, |
|
"eval_dim_64_euclidean_accuracy": 0.8765972550875533, |
|
"eval_dim_64_manhattan_accuracy": 0.8778987221959299, |
|
"eval_dim_64_max_accuracy": 0.8778987221959299, |
|
"eval_loss": 16.4149112701416, |
|
"eval_runtime": 101.2915, |
|
"eval_samples_per_second": 83.442, |
|
"eval_sequential_score": 0.8765972550875533, |
|
"eval_steps_per_second": 2.616, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 18.84728649558267, |
|
"grad_norm": 4.982476234436035, |
|
"learning_rate": 1.5638767095401778e-05, |
|
"loss": 16.2106, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 18.84728649558267, |
|
"eval_dim_128_cosine_accuracy": 0.8780170373876006, |
|
"eval_dim_128_dot_accuracy": 0.12529578797917654, |
|
"eval_dim_128_euclidean_accuracy": 0.878490298154283, |
|
"eval_dim_128_manhattan_accuracy": 0.8794368196876479, |
|
"eval_dim_128_max_accuracy": 0.8794368196876479, |
|
"eval_dim_256_cosine_accuracy": 0.8799100804543304, |
|
"eval_dim_256_dot_accuracy": 0.1226928537624231, |
|
"eval_dim_256_euclidean_accuracy": 0.8797917652626597, |
|
"eval_dim_256_manhattan_accuracy": 0.8801467108376716, |
|
"eval_dim_256_max_accuracy": 0.8801467108376716, |
|
"eval_dim_384_cosine_accuracy": 0.879081874112636, |
|
"eval_dim_384_dot_accuracy": 0.12091812588736393, |
|
"eval_dim_384_euclidean_accuracy": 0.879081874112636, |
|
"eval_dim_384_manhattan_accuracy": 0.8794368196876479, |
|
"eval_dim_384_max_accuracy": 0.8794368196876479, |
|
"eval_dim_64_cosine_accuracy": 0.8767155702792239, |
|
"eval_dim_64_dot_accuracy": 0.13002839564600094, |
|
"eval_dim_64_euclidean_accuracy": 0.8768338854708945, |
|
"eval_dim_64_manhattan_accuracy": 0.8778987221959299, |
|
"eval_dim_64_max_accuracy": 0.8778987221959299, |
|
"eval_loss": 16.423009872436523, |
|
"eval_runtime": 103.6087, |
|
"eval_samples_per_second": 81.576, |
|
"eval_sequential_score": 0.8767155702792239, |
|
"eval_steps_per_second": 2.558, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 19.183845183003786, |
|
"grad_norm": 6.176373481750488, |
|
"learning_rate": 1.5442433646315792e-05, |
|
"loss": 16.2052, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 19.183845183003786, |
|
"eval_dim_128_cosine_accuracy": 0.8769522006625651, |
|
"eval_dim_128_dot_accuracy": 0.12576904874585898, |
|
"eval_dim_128_euclidean_accuracy": 0.8758873639375295, |
|
"eval_dim_128_manhattan_accuracy": 0.8793185044959773, |
|
"eval_dim_128_max_accuracy": 0.8793185044959773, |
|
"eval_dim_256_cosine_accuracy": 0.8776620918125887, |
|
"eval_dim_256_dot_accuracy": 0.12328442972077615, |
|
"eval_dim_256_euclidean_accuracy": 0.8778987221959299, |
|
"eval_dim_256_manhattan_accuracy": 0.8796734500709891, |
|
"eval_dim_256_max_accuracy": 0.8796734500709891, |
|
"eval_dim_384_cosine_accuracy": 0.878490298154283, |
|
"eval_dim_384_dot_accuracy": 0.121509701845717, |
|
"eval_dim_384_euclidean_accuracy": 0.878490298154283, |
|
"eval_dim_384_manhattan_accuracy": 0.8814481779460482, |
|
"eval_dim_384_max_accuracy": 0.8814481779460482, |
|
"eval_dim_64_cosine_accuracy": 0.8744675816374823, |
|
"eval_dim_64_dot_accuracy": 0.13369616658778988, |
|
"eval_dim_64_euclidean_accuracy": 0.8742309512541411, |
|
"eval_dim_64_manhattan_accuracy": 0.8781353525792712, |
|
"eval_dim_64_max_accuracy": 0.8781353525792712, |
|
"eval_loss": 16.435117721557617, |
|
"eval_runtime": 104.4101, |
|
"eval_samples_per_second": 80.95, |
|
"eval_sequential_score": 0.8744675816374823, |
|
"eval_steps_per_second": 2.538, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 19.520403870424904, |
|
"grad_norm": 7.323819160461426, |
|
"learning_rate": 1.5243072835572319e-05, |
|
"loss": 16.186, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 19.520403870424904, |
|
"eval_dim_128_cosine_accuracy": 0.8776620918125887, |
|
"eval_dim_128_dot_accuracy": 0.12363937529578797, |
|
"eval_dim_128_euclidean_accuracy": 0.8776620918125887, |
|
"eval_dim_128_manhattan_accuracy": 0.876360624704212, |
|
"eval_dim_128_max_accuracy": 0.8776620918125887, |
|
"eval_dim_256_cosine_accuracy": 0.8793185044959773, |
|
"eval_dim_256_dot_accuracy": 0.12198296261239944, |
|
"eval_dim_256_euclidean_accuracy": 0.8789635589209654, |
|
"eval_dim_256_manhattan_accuracy": 0.8777804070042593, |
|
"eval_dim_256_max_accuracy": 0.8793185044959773, |
|
"eval_dim_384_cosine_accuracy": 0.8792001893043067, |
|
"eval_dim_384_dot_accuracy": 0.12079981069569333, |
|
"eval_dim_384_euclidean_accuracy": 0.8792001893043067, |
|
"eval_dim_384_manhattan_accuracy": 0.8789635589209654, |
|
"eval_dim_384_max_accuracy": 0.8792001893043067, |
|
"eval_dim_64_cosine_accuracy": 0.8762423095125415, |
|
"eval_dim_64_dot_accuracy": 0.13097491717936582, |
|
"eval_dim_64_euclidean_accuracy": 0.8748225272124941, |
|
"eval_dim_64_manhattan_accuracy": 0.8782536677709418, |
|
"eval_dim_64_max_accuracy": 0.8782536677709418, |
|
"eval_loss": 16.433080673217773, |
|
"eval_runtime": 101.0285, |
|
"eval_samples_per_second": 83.66, |
|
"eval_sequential_score": 0.8762423095125415, |
|
"eval_steps_per_second": 2.623, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 19.856962557846025, |
|
"grad_norm": 6.637113571166992, |
|
"learning_rate": 1.5040795557913246e-05, |
|
"loss": 16.1496, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 19.856962557846025, |
|
"eval_dim_128_cosine_accuracy": 0.8774254614292475, |
|
"eval_dim_128_dot_accuracy": 0.12529578797917654, |
|
"eval_dim_128_euclidean_accuracy": 0.8770705158542357, |
|
"eval_dim_128_manhattan_accuracy": 0.8782536677709418, |
|
"eval_dim_128_max_accuracy": 0.8782536677709418, |
|
"eval_dim_256_cosine_accuracy": 0.8781353525792712, |
|
"eval_dim_256_dot_accuracy": 0.12375769048745859, |
|
"eval_dim_256_euclidean_accuracy": 0.8783719829626124, |
|
"eval_dim_256_manhattan_accuracy": 0.879081874112636, |
|
"eval_dim_256_max_accuracy": 0.879081874112636, |
|
"eval_dim_384_cosine_accuracy": 0.8780170373876006, |
|
"eval_dim_384_dot_accuracy": 0.12198296261239944, |
|
"eval_dim_384_euclidean_accuracy": 0.8780170373876006, |
|
"eval_dim_384_manhattan_accuracy": 0.8786086133459536, |
|
"eval_dim_384_max_accuracy": 0.8786086133459536, |
|
"eval_dim_64_cosine_accuracy": 0.8770705158542357, |
|
"eval_dim_64_dot_accuracy": 0.13357785139611927, |
|
"eval_dim_64_euclidean_accuracy": 0.8756507335541883, |
|
"eval_dim_64_manhattan_accuracy": 0.8775437766209181, |
|
"eval_dim_64_max_accuracy": 0.8775437766209181, |
|
"eval_loss": 16.437721252441406, |
|
"eval_runtime": 103.9645, |
|
"eval_samples_per_second": 81.297, |
|
"eval_sequential_score": 0.8770705158542357, |
|
"eval_steps_per_second": 2.549, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 20.193521245267142, |
|
"grad_norm": 4.9336957931518555, |
|
"learning_rate": 1.4835714330369445e-05, |
|
"loss": 16.151, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 20.193521245267142, |
|
"eval_dim_128_cosine_accuracy": 0.8765972550875533, |
|
"eval_dim_128_dot_accuracy": 0.1261239943208708, |
|
"eval_dim_128_euclidean_accuracy": 0.8761239943208708, |
|
"eval_dim_128_manhattan_accuracy": 0.8797917652626597, |
|
"eval_dim_128_max_accuracy": 0.8797917652626597, |
|
"eval_dim_256_cosine_accuracy": 0.8780170373876006, |
|
"eval_dim_256_dot_accuracy": 0.12245622337908188, |
|
"eval_dim_256_euclidean_accuracy": 0.8771888310459063, |
|
"eval_dim_256_manhattan_accuracy": 0.8801467108376716, |
|
"eval_dim_256_max_accuracy": 0.8801467108376716, |
|
"eval_dim_384_cosine_accuracy": 0.8780170373876006, |
|
"eval_dim_384_dot_accuracy": 0.12198296261239944, |
|
"eval_dim_384_euclidean_accuracy": 0.8780170373876006, |
|
"eval_dim_384_manhattan_accuracy": 0.8819214387127308, |
|
"eval_dim_384_max_accuracy": 0.8819214387127308, |
|
"eval_dim_64_cosine_accuracy": 0.8750591575958353, |
|
"eval_dim_64_dot_accuracy": 0.1361807856128727, |
|
"eval_dim_64_euclidean_accuracy": 0.8730477993374349, |
|
"eval_dim_64_manhattan_accuracy": 0.878490298154283, |
|
"eval_dim_64_max_accuracy": 0.878490298154283, |
|
"eval_loss": 16.44074821472168, |
|
"eval_runtime": 101.9564, |
|
"eval_samples_per_second": 82.898, |
|
"eval_sequential_score": 0.8750591575958353, |
|
"eval_steps_per_second": 2.599, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 20.530079932688263, |
|
"grad_norm": 5.225156784057617, |
|
"learning_rate": 1.4627943229672992e-05, |
|
"loss": 16.1081, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 20.530079932688263, |
|
"eval_dim_128_cosine_accuracy": 0.8758873639375295, |
|
"eval_dim_128_dot_accuracy": 0.1261239943208708, |
|
"eval_dim_128_euclidean_accuracy": 0.8758873639375295, |
|
"eval_dim_128_manhattan_accuracy": 0.8781353525792712, |
|
"eval_dim_128_max_accuracy": 0.8781353525792712, |
|
"eval_dim_256_cosine_accuracy": 0.8775437766209181, |
|
"eval_dim_256_dot_accuracy": 0.12245622337908188, |
|
"eval_dim_256_euclidean_accuracy": 0.8778987221959299, |
|
"eval_dim_256_manhattan_accuracy": 0.8776620918125887, |
|
"eval_dim_256_max_accuracy": 0.8778987221959299, |
|
"eval_dim_384_cosine_accuracy": 0.8774254614292475, |
|
"eval_dim_384_dot_accuracy": 0.12257453857075248, |
|
"eval_dim_384_euclidean_accuracy": 0.8774254614292475, |
|
"eval_dim_384_manhattan_accuracy": 0.8788452437292948, |
|
"eval_dim_384_max_accuracy": 0.8788452437292948, |
|
"eval_dim_64_cosine_accuracy": 0.8749408424041647, |
|
"eval_dim_64_dot_accuracy": 0.13712730714623758, |
|
"eval_dim_64_euclidean_accuracy": 0.8743492664458117, |
|
"eval_dim_64_manhattan_accuracy": 0.8765972550875533, |
|
"eval_dim_64_max_accuracy": 0.8765972550875533, |
|
"eval_loss": 16.442630767822266, |
|
"eval_runtime": 104.3455, |
|
"eval_samples_per_second": 81.0, |
|
"eval_sequential_score": 0.8749408424041647, |
|
"eval_steps_per_second": 2.54, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 20.86663862010938, |
|
"grad_norm": 4.5568132400512695, |
|
"learning_rate": 1.4417597828801833e-05, |
|
"loss": 16.0864, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 20.86663862010938, |
|
"eval_dim_128_cosine_accuracy": 0.8774254614292475, |
|
"eval_dim_128_dot_accuracy": 0.12659725508755323, |
|
"eval_dim_128_euclidean_accuracy": 0.8765972550875533, |
|
"eval_dim_128_manhattan_accuracy": 0.879081874112636, |
|
"eval_dim_128_max_accuracy": 0.879081874112636, |
|
"eval_dim_256_cosine_accuracy": 0.8781353525792712, |
|
"eval_dim_256_dot_accuracy": 0.12292948414576432, |
|
"eval_dim_256_euclidean_accuracy": 0.8780170373876006, |
|
"eval_dim_256_manhattan_accuracy": 0.880619971604354, |
|
"eval_dim_256_max_accuracy": 0.880619971604354, |
|
"eval_dim_384_cosine_accuracy": 0.8787269285376242, |
|
"eval_dim_384_dot_accuracy": 0.12127307146237577, |
|
"eval_dim_384_euclidean_accuracy": 0.8787269285376242, |
|
"eval_dim_384_manhattan_accuracy": 0.8793185044959773, |
|
"eval_dim_384_max_accuracy": 0.8793185044959773, |
|
"eval_dim_64_cosine_accuracy": 0.8745858968291529, |
|
"eval_dim_64_dot_accuracy": 0.13724562233790819, |
|
"eval_dim_64_euclidean_accuracy": 0.8744675816374823, |
|
"eval_dim_64_manhattan_accuracy": 0.8780170373876006, |
|
"eval_dim_64_max_accuracy": 0.8780170373876006, |
|
"eval_loss": 16.441152572631836, |
|
"eval_runtime": 103.8678, |
|
"eval_samples_per_second": 81.373, |
|
"eval_sequential_score": 0.8745858968291529, |
|
"eval_steps_per_second": 2.551, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 21.203197307530502, |
|
"grad_norm": 6.664557933807373, |
|
"learning_rate": 1.4204795132692146e-05, |
|
"loss": 16.0934, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 21.203197307530502, |
|
"eval_dim_128_cosine_accuracy": 0.8768338854708945, |
|
"eval_dim_128_dot_accuracy": 0.12789872219592996, |
|
"eval_dim_128_euclidean_accuracy": 0.8758873639375295, |
|
"eval_dim_128_manhattan_accuracy": 0.8803833412210128, |
|
"eval_dim_128_max_accuracy": 0.8803833412210128, |
|
"eval_dim_256_cosine_accuracy": 0.8782536677709418, |
|
"eval_dim_256_dot_accuracy": 0.12411263606247042, |
|
"eval_dim_256_euclidean_accuracy": 0.8777804070042593, |
|
"eval_dim_256_manhattan_accuracy": 0.88180312352106, |
|
"eval_dim_256_max_accuracy": 0.88180312352106, |
|
"eval_dim_384_cosine_accuracy": 0.8794368196876479, |
|
"eval_dim_384_dot_accuracy": 0.12056318031235211, |
|
"eval_dim_384_euclidean_accuracy": 0.8794368196876479, |
|
"eval_dim_384_manhattan_accuracy": 0.881211547562707, |
|
"eval_dim_384_max_accuracy": 0.881211547562707, |
|
"eval_dim_64_cosine_accuracy": 0.8745858968291529, |
|
"eval_dim_64_dot_accuracy": 0.14008518693800284, |
|
"eval_dim_64_euclidean_accuracy": 0.8729294841457643, |
|
"eval_dim_64_manhattan_accuracy": 0.8795551348793185, |
|
"eval_dim_64_max_accuracy": 0.8795551348793185, |
|
"eval_loss": 16.4547176361084, |
|
"eval_runtime": 105.011, |
|
"eval_samples_per_second": 80.487, |
|
"eval_sequential_score": 0.8745858968291529, |
|
"eval_steps_per_second": 2.524, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 21.53975599495162, |
|
"grad_norm": 6.669680118560791, |
|
"learning_rate": 1.3989653513154165e-05, |
|
"loss": 16.0382, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 21.53975599495162, |
|
"eval_dim_128_cosine_accuracy": 0.8742309512541411, |
|
"eval_dim_128_dot_accuracy": 0.1283719829626124, |
|
"eval_dim_128_euclidean_accuracy": 0.8738760056791292, |
|
"eval_dim_128_manhattan_accuracy": 0.8748225272124941, |
|
"eval_dim_128_max_accuracy": 0.8748225272124941, |
|
"eval_dim_256_cosine_accuracy": 0.8751774727875059, |
|
"eval_dim_256_dot_accuracy": 0.12446758163748226, |
|
"eval_dim_256_euclidean_accuracy": 0.8754141031708471, |
|
"eval_dim_256_manhattan_accuracy": 0.8765972550875533, |
|
"eval_dim_256_max_accuracy": 0.8765972550875533, |
|
"eval_dim_384_cosine_accuracy": 0.8765972550875533, |
|
"eval_dim_384_dot_accuracy": 0.12340274491244675, |
|
"eval_dim_384_euclidean_accuracy": 0.8765972550875533, |
|
"eval_dim_384_manhattan_accuracy": 0.8761239943208708, |
|
"eval_dim_384_max_accuracy": 0.8765972550875533, |
|
"eval_dim_64_cosine_accuracy": 0.8723379081874113, |
|
"eval_dim_64_dot_accuracy": 0.14020350212967345, |
|
"eval_dim_64_euclidean_accuracy": 0.8703265499290109, |
|
"eval_dim_64_manhattan_accuracy": 0.8754141031708471, |
|
"eval_dim_64_max_accuracy": 0.8754141031708471, |
|
"eval_loss": 16.458948135375977, |
|
"eval_runtime": 101.007, |
|
"eval_samples_per_second": 83.677, |
|
"eval_sequential_score": 0.8723379081874113, |
|
"eval_steps_per_second": 2.624, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 21.87631468237274, |
|
"grad_norm": 5.666304588317871, |
|
"learning_rate": 1.37722926430277e-05, |
|
"loss": 16.0279, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 21.87631468237274, |
|
"eval_dim_128_cosine_accuracy": 0.8751774727875059, |
|
"eval_dim_128_dot_accuracy": 0.12979176526265973, |
|
"eval_dim_128_euclidean_accuracy": 0.8743492664458117, |
|
"eval_dim_128_manhattan_accuracy": 0.8771888310459063, |
|
"eval_dim_128_max_accuracy": 0.8771888310459063, |
|
"eval_dim_256_cosine_accuracy": 0.8765972550875533, |
|
"eval_dim_256_dot_accuracy": 0.1260056791292002, |
|
"eval_dim_256_euclidean_accuracy": 0.8761239943208708, |
|
"eval_dim_256_manhattan_accuracy": 0.8796734500709891, |
|
"eval_dim_256_max_accuracy": 0.8796734500709891, |
|
"eval_dim_384_cosine_accuracy": 0.8773071462375769, |
|
"eval_dim_384_dot_accuracy": 0.1226928537624231, |
|
"eval_dim_384_euclidean_accuracy": 0.8773071462375769, |
|
"eval_dim_384_manhattan_accuracy": 0.8777804070042593, |
|
"eval_dim_384_max_accuracy": 0.8777804070042593, |
|
"eval_dim_64_cosine_accuracy": 0.8728111689540937, |
|
"eval_dim_64_dot_accuracy": 0.14221486038807382, |
|
"eval_dim_64_euclidean_accuracy": 0.8732844297207761, |
|
"eval_dim_64_manhattan_accuracy": 0.8776620918125887, |
|
"eval_dim_64_max_accuracy": 0.8776620918125887, |
|
"eval_loss": 16.46676254272461, |
|
"eval_runtime": 102.9103, |
|
"eval_samples_per_second": 82.13, |
|
"eval_sequential_score": 0.8728111689540937, |
|
"eval_steps_per_second": 2.575, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 22.212873369793858, |
|
"grad_norm": 6.600480556488037, |
|
"learning_rate": 1.3552833429613939e-05, |
|
"loss": 16.0327, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 22.212873369793858, |
|
"eval_dim_128_cosine_accuracy": 0.8742309512541411, |
|
"eval_dim_128_dot_accuracy": 0.13002839564600094, |
|
"eval_dim_128_euclidean_accuracy": 0.8742309512541411, |
|
"eval_dim_128_manhattan_accuracy": 0.880028395646001, |
|
"eval_dim_128_max_accuracy": 0.880028395646001, |
|
"eval_dim_256_cosine_accuracy": 0.8768338854708945, |
|
"eval_dim_256_dot_accuracy": 0.12363937529578797, |
|
"eval_dim_256_euclidean_accuracy": 0.8764789398958827, |
|
"eval_dim_256_manhattan_accuracy": 0.8814481779460482, |
|
"eval_dim_256_max_accuracy": 0.8814481779460482, |
|
"eval_dim_384_cosine_accuracy": 0.8773071462375769, |
|
"eval_dim_384_dot_accuracy": 0.1226928537624231, |
|
"eval_dim_384_euclidean_accuracy": 0.8773071462375769, |
|
"eval_dim_384_manhattan_accuracy": 0.8807382867960246, |
|
"eval_dim_384_max_accuracy": 0.8807382867960246, |
|
"eval_dim_64_cosine_accuracy": 0.8726928537624231, |
|
"eval_dim_64_dot_accuracy": 0.1432796971131093, |
|
"eval_dim_64_euclidean_accuracy": 0.869971604353999, |
|
"eval_dim_64_manhattan_accuracy": 0.8795551348793185, |
|
"eval_dim_64_max_accuracy": 0.8795551348793185, |
|
"eval_loss": 16.47365379333496, |
|
"eval_runtime": 104.4255, |
|
"eval_samples_per_second": 80.938, |
|
"eval_sequential_score": 0.8726928537624231, |
|
"eval_steps_per_second": 2.538, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 22.549432057214975, |
|
"grad_norm": 7.925108432769775, |
|
"learning_rate": 1.3331397947420578e-05, |
|
"loss": 15.979, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 22.549432057214975, |
|
"eval_dim_128_cosine_accuracy": 0.8739943208707998, |
|
"eval_dim_128_dot_accuracy": 0.1293185044959773, |
|
"eval_dim_128_euclidean_accuracy": 0.8732844297207761, |
|
"eval_dim_128_manhattan_accuracy": 0.8782536677709418, |
|
"eval_dim_128_max_accuracy": 0.8782536677709418, |
|
"eval_dim_256_cosine_accuracy": 0.8770705158542357, |
|
"eval_dim_256_dot_accuracy": 0.12328442972077615, |
|
"eval_dim_256_euclidean_accuracy": 0.8773071462375769, |
|
"eval_dim_256_manhattan_accuracy": 0.8793185044959773, |
|
"eval_dim_256_max_accuracy": 0.8793185044959773, |
|
"eval_dim_384_cosine_accuracy": 0.8770705158542357, |
|
"eval_dim_384_dot_accuracy": 0.12292948414576432, |
|
"eval_dim_384_euclidean_accuracy": 0.8770705158542357, |
|
"eval_dim_384_manhattan_accuracy": 0.8778987221959299, |
|
"eval_dim_384_max_accuracy": 0.8778987221959299, |
|
"eval_dim_64_cosine_accuracy": 0.8722195929957407, |
|
"eval_dim_64_dot_accuracy": 0.14162328442972077, |
|
"eval_dim_64_euclidean_accuracy": 0.8700899195456696, |
|
"eval_dim_64_manhattan_accuracy": 0.8767155702792239, |
|
"eval_dim_64_max_accuracy": 0.8767155702792239, |
|
"eval_loss": 16.468605041503906, |
|
"eval_runtime": 101.8518, |
|
"eval_samples_per_second": 82.983, |
|
"eval_sequential_score": 0.8722195929957407, |
|
"eval_steps_per_second": 2.602, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 22.885990744636096, |
|
"grad_norm": 6.396854877471924, |
|
"learning_rate": 1.3108109370257714e-05, |
|
"loss": 15.9622, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 22.885990744636096, |
|
"eval_dim_128_cosine_accuracy": 0.8743492664458117, |
|
"eval_dim_128_dot_accuracy": 0.13002839564600094, |
|
"eval_dim_128_euclidean_accuracy": 0.873639375295788, |
|
"eval_dim_128_manhattan_accuracy": 0.8786086133459536, |
|
"eval_dim_128_max_accuracy": 0.8786086133459536, |
|
"eval_dim_256_cosine_accuracy": 0.8760056791292002, |
|
"eval_dim_256_dot_accuracy": 0.12434926644581164, |
|
"eval_dim_256_euclidean_accuracy": 0.8757690487458589, |
|
"eval_dim_256_manhattan_accuracy": 0.8805016564126834, |
|
"eval_dim_256_max_accuracy": 0.8805016564126834, |
|
"eval_dim_384_cosine_accuracy": 0.8764789398958827, |
|
"eval_dim_384_dot_accuracy": 0.12352106010411737, |
|
"eval_dim_384_euclidean_accuracy": 0.8764789398958827, |
|
"eval_dim_384_manhattan_accuracy": 0.8807382867960246, |
|
"eval_dim_384_max_accuracy": 0.8807382867960246, |
|
"eval_dim_64_cosine_accuracy": 0.8721012778040701, |
|
"eval_dim_64_dot_accuracy": 0.14351632749645055, |
|
"eval_dim_64_euclidean_accuracy": 0.8703265499290109, |
|
"eval_dim_64_manhattan_accuracy": 0.8781353525792712, |
|
"eval_dim_64_max_accuracy": 0.8781353525792712, |
|
"eval_loss": 16.473587036132812, |
|
"eval_runtime": 103.4538, |
|
"eval_samples_per_second": 81.698, |
|
"eval_sequential_score": 0.8721012778040701, |
|
"eval_steps_per_second": 2.562, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 23.222549432057214, |
|
"grad_norm": 4.757622241973877, |
|
"learning_rate": 1.288309190272222e-05, |
|
"loss": 15.9881, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 23.222549432057214, |
|
"eval_dim_128_cosine_accuracy": 0.8743492664458117, |
|
"eval_dim_128_dot_accuracy": 0.13097491717936582, |
|
"eval_dim_128_euclidean_accuracy": 0.8737576904874585, |
|
"eval_dim_128_manhattan_accuracy": 0.879081874112636, |
|
"eval_dim_128_max_accuracy": 0.879081874112636, |
|
"eval_dim_256_cosine_accuracy": 0.8756507335541883, |
|
"eval_dim_256_dot_accuracy": 0.12588736393752958, |
|
"eval_dim_256_euclidean_accuracy": 0.8747042120208235, |
|
"eval_dim_256_manhattan_accuracy": 0.8795551348793185, |
|
"eval_dim_256_max_accuracy": 0.8795551348793185, |
|
"eval_dim_384_cosine_accuracy": 0.8755324183625177, |
|
"eval_dim_384_dot_accuracy": 0.12446758163748226, |
|
"eval_dim_384_euclidean_accuracy": 0.8755324183625177, |
|
"eval_dim_384_manhattan_accuracy": 0.879081874112636, |
|
"eval_dim_384_max_accuracy": 0.879081874112636, |
|
"eval_dim_64_cosine_accuracy": 0.8723379081874113, |
|
"eval_dim_64_dot_accuracy": 0.14375295787979175, |
|
"eval_dim_64_euclidean_accuracy": 0.8700899195456696, |
|
"eval_dim_64_manhattan_accuracy": 0.8788452437292948, |
|
"eval_dim_64_max_accuracy": 0.8788452437292948, |
|
"eval_loss": 16.48019790649414, |
|
"eval_runtime": 104.0826, |
|
"eval_samples_per_second": 81.205, |
|
"eval_sequential_score": 0.8723379081874113, |
|
"eval_steps_per_second": 2.546, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 23.559108119478335, |
|
"grad_norm": 5.279081344604492, |
|
"learning_rate": 1.2656470711108763e-05, |
|
"loss": 15.9482, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 23.559108119478335, |
|
"eval_dim_128_cosine_accuracy": 0.8724562233790819, |
|
"eval_dim_128_dot_accuracy": 0.13073828679602462, |
|
"eval_dim_128_euclidean_accuracy": 0.8728111689540937, |
|
"eval_dim_128_manhattan_accuracy": 0.8783719829626124, |
|
"eval_dim_128_max_accuracy": 0.8783719829626124, |
|
"eval_dim_256_cosine_accuracy": 0.8761239943208708, |
|
"eval_dim_256_dot_accuracy": 0.1250591575958353, |
|
"eval_dim_256_euclidean_accuracy": 0.8761239943208708, |
|
"eval_dim_256_manhattan_accuracy": 0.8797917652626597, |
|
"eval_dim_256_max_accuracy": 0.8797917652626597, |
|
"eval_dim_384_cosine_accuracy": 0.8761239943208708, |
|
"eval_dim_384_dot_accuracy": 0.1238760056791292, |
|
"eval_dim_384_euclidean_accuracy": 0.8761239943208708, |
|
"eval_dim_384_manhattan_accuracy": 0.8770705158542357, |
|
"eval_dim_384_max_accuracy": 0.8770705158542357, |
|
"eval_dim_64_cosine_accuracy": 0.8710364410790346, |
|
"eval_dim_64_dot_accuracy": 0.143989588263133, |
|
"eval_dim_64_euclidean_accuracy": 0.867841930903928, |
|
"eval_dim_64_manhattan_accuracy": 0.8764789398958827, |
|
"eval_dim_64_max_accuracy": 0.8764789398958827, |
|
"eval_loss": 16.482074737548828, |
|
"eval_runtime": 102.3602, |
|
"eval_samples_per_second": 82.571, |
|
"eval_sequential_score": 0.8710364410790346, |
|
"eval_steps_per_second": 2.589, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 23.895666806899452, |
|
"grad_norm": 5.999639511108398, |
|
"learning_rate": 1.2428371853785872e-05, |
|
"loss": 15.9228, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 23.895666806899452, |
|
"eval_dim_128_cosine_accuracy": 0.8725745385707525, |
|
"eval_dim_128_dot_accuracy": 0.13310459062943683, |
|
"eval_dim_128_euclidean_accuracy": 0.8719829626123994, |
|
"eval_dim_128_manhattan_accuracy": 0.878490298154283, |
|
"eval_dim_128_max_accuracy": 0.878490298154283, |
|
"eval_dim_256_cosine_accuracy": 0.8748225272124941, |
|
"eval_dim_256_dot_accuracy": 0.12671557027922387, |
|
"eval_dim_256_euclidean_accuracy": 0.8743492664458117, |
|
"eval_dim_256_manhattan_accuracy": 0.879081874112636, |
|
"eval_dim_256_max_accuracy": 0.879081874112636, |
|
"eval_dim_384_cosine_accuracy": 0.8750591575958353, |
|
"eval_dim_384_dot_accuracy": 0.1249408424041647, |
|
"eval_dim_384_euclidean_accuracy": 0.8750591575958353, |
|
"eval_dim_384_manhattan_accuracy": 0.8781353525792712, |
|
"eval_dim_384_max_accuracy": 0.8781353525792712, |
|
"eval_dim_64_cosine_accuracy": 0.870918125887364, |
|
"eval_dim_64_dot_accuracy": 0.14469947941315664, |
|
"eval_dim_64_euclidean_accuracy": 0.8691433980123048, |
|
"eval_dim_64_manhattan_accuracy": 0.8776620918125887, |
|
"eval_dim_64_max_accuracy": 0.8776620918125887, |
|
"eval_loss": 16.499635696411133, |
|
"eval_runtime": 103.3405, |
|
"eval_samples_per_second": 81.788, |
|
"eval_sequential_score": 0.870918125887364, |
|
"eval_steps_per_second": 2.564, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 24.232225494320573, |
|
"grad_norm": 6.511181354522705, |
|
"learning_rate": 1.2198922211075779e-05, |
|
"loss": 15.9418, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 24.232225494320573, |
|
"eval_dim_128_cosine_accuracy": 0.870918125887364, |
|
"eval_dim_128_dot_accuracy": 0.1353525792711784, |
|
"eval_dim_128_euclidean_accuracy": 0.8703265499290109, |
|
"eval_dim_128_manhattan_accuracy": 0.8783719829626124, |
|
"eval_dim_128_max_accuracy": 0.8783719829626124, |
|
"eval_dim_256_cosine_accuracy": 0.8728111689540937, |
|
"eval_dim_256_dot_accuracy": 0.12884524372929484, |
|
"eval_dim_256_euclidean_accuracy": 0.8721012778040701, |
|
"eval_dim_256_manhattan_accuracy": 0.8794368196876479, |
|
"eval_dim_256_max_accuracy": 0.8794368196876479, |
|
"eval_dim_384_cosine_accuracy": 0.8734027449124467, |
|
"eval_dim_384_dot_accuracy": 0.12659725508755323, |
|
"eval_dim_384_euclidean_accuracy": 0.8734027449124467, |
|
"eval_dim_384_manhattan_accuracy": 0.8794368196876479, |
|
"eval_dim_384_max_accuracy": 0.8794368196876479, |
|
"eval_dim_64_cosine_accuracy": 0.8698532891623284, |
|
"eval_dim_64_dot_accuracy": 0.14564600094652153, |
|
"eval_dim_64_euclidean_accuracy": 0.8680785612872692, |
|
"eval_dim_64_manhattan_accuracy": 0.8770705158542357, |
|
"eval_dim_64_max_accuracy": 0.8770705158542357, |
|
"eval_loss": 16.497343063354492, |
|
"eval_runtime": 104.6868, |
|
"eval_samples_per_second": 80.736, |
|
"eval_sequential_score": 0.8698532891623284, |
|
"eval_steps_per_second": 2.531, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 24.56878418174169, |
|
"grad_norm": 5.682207107543945, |
|
"learning_rate": 1.1968249414677055e-05, |
|
"loss": 15.896, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 24.56878418174169, |
|
"eval_dim_128_cosine_accuracy": 0.8696166587789872, |
|
"eval_dim_128_dot_accuracy": 0.13487931850449597, |
|
"eval_dim_128_euclidean_accuracy": 0.8685518220539518, |
|
"eval_dim_128_manhattan_accuracy": 0.8764789398958827, |
|
"eval_dim_128_max_accuracy": 0.8764789398958827, |
|
"eval_dim_256_cosine_accuracy": 0.8716280170373876, |
|
"eval_dim_256_dot_accuracy": 0.12896355892096545, |
|
"eval_dim_256_euclidean_accuracy": 0.871509701845717, |
|
"eval_dim_256_manhattan_accuracy": 0.8777804070042593, |
|
"eval_dim_256_max_accuracy": 0.8777804070042593, |
|
"eval_dim_384_cosine_accuracy": 0.8726928537624231, |
|
"eval_dim_384_dot_accuracy": 0.1273071462375769, |
|
"eval_dim_384_euclidean_accuracy": 0.8726928537624231, |
|
"eval_dim_384_manhattan_accuracy": 0.8773071462375769, |
|
"eval_dim_384_max_accuracy": 0.8773071462375769, |
|
"eval_dim_64_cosine_accuracy": 0.8685518220539518, |
|
"eval_dim_64_dot_accuracy": 0.14694746805489825, |
|
"eval_dim_64_euclidean_accuracy": 0.8659488878371983, |
|
"eval_dim_64_manhattan_accuracy": 0.8760056791292002, |
|
"eval_dim_64_max_accuracy": 0.8760056791292002, |
|
"eval_loss": 16.498498916625977, |
|
"eval_runtime": 102.6029, |
|
"eval_samples_per_second": 82.376, |
|
"eval_sequential_score": 0.8685518220539518, |
|
"eval_steps_per_second": 2.583, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 24.90534286916281, |
|
"grad_norm": 5.5915117263793945, |
|
"learning_rate": 1.1736481776669307e-05, |
|
"loss": 15.8788, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 24.90534286916281, |
|
"eval_dim_128_cosine_accuracy": 0.8691433980123048, |
|
"eval_dim_128_dot_accuracy": 0.1361807856128727, |
|
"eval_dim_128_euclidean_accuracy": 0.8697349739706578, |
|
"eval_dim_128_manhattan_accuracy": 0.8747042120208235, |
|
"eval_dim_128_max_accuracy": 0.8747042120208235, |
|
"eval_dim_256_cosine_accuracy": 0.871509701845717, |
|
"eval_dim_256_dot_accuracy": 0.13073828679602462, |
|
"eval_dim_256_euclidean_accuracy": 0.8704448651206815, |
|
"eval_dim_256_manhattan_accuracy": 0.8770705158542357, |
|
"eval_dim_256_max_accuracy": 0.8770705158542357, |
|
"eval_dim_384_cosine_accuracy": 0.8717463322290582, |
|
"eval_dim_384_dot_accuracy": 0.1282536677709418, |
|
"eval_dim_384_euclidean_accuracy": 0.8717463322290582, |
|
"eval_dim_384_manhattan_accuracy": 0.8758873639375295, |
|
"eval_dim_384_max_accuracy": 0.8758873639375295, |
|
"eval_dim_64_cosine_accuracy": 0.8661855182205395, |
|
"eval_dim_64_dot_accuracy": 0.14824893516327498, |
|
"eval_dim_64_euclidean_accuracy": 0.8667770941788926, |
|
"eval_dim_64_manhattan_accuracy": 0.8744675816374823, |
|
"eval_dim_64_max_accuracy": 0.8744675816374823, |
|
"eval_loss": 16.517175674438477, |
|
"eval_runtime": 103.5179, |
|
"eval_samples_per_second": 81.648, |
|
"eval_sequential_score": 0.8661855182205395, |
|
"eval_steps_per_second": 2.56, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 25.24190155658393, |
|
"grad_norm": 5.408066749572754, |
|
"learning_rate": 1.150374821813937e-05, |
|
"loss": 15.9147, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 25.24190155658393, |
|
"eval_dim_128_cosine_accuracy": 0.8677236157122574, |
|
"eval_dim_128_dot_accuracy": 0.13724562233790819, |
|
"eval_dim_128_euclidean_accuracy": 0.8673686701372456, |
|
"eval_dim_128_manhattan_accuracy": 0.8768338854708945, |
|
"eval_dim_128_max_accuracy": 0.8768338854708945, |
|
"eval_dim_256_cosine_accuracy": 0.8705631803123521, |
|
"eval_dim_256_dot_accuracy": 0.13144817794604827, |
|
"eval_dim_256_euclidean_accuracy": 0.869971604353999, |
|
"eval_dim_256_manhattan_accuracy": 0.8782536677709418, |
|
"eval_dim_256_max_accuracy": 0.8782536677709418, |
|
"eval_dim_384_cosine_accuracy": 0.8711547562707052, |
|
"eval_dim_384_dot_accuracy": 0.12884524372929484, |
|
"eval_dim_384_euclidean_accuracy": 0.8711547562707052, |
|
"eval_dim_384_manhattan_accuracy": 0.8782536677709418, |
|
"eval_dim_384_max_accuracy": 0.8782536677709418, |
|
"eval_dim_64_cosine_accuracy": 0.8661855182205395, |
|
"eval_dim_64_dot_accuracy": 0.14955040227165167, |
|
"eval_dim_64_euclidean_accuracy": 0.866658778987222, |
|
"eval_dim_64_manhattan_accuracy": 0.8768338854708945, |
|
"eval_dim_64_max_accuracy": 0.8768338854708945, |
|
"eval_loss": 16.506189346313477, |
|
"eval_runtime": 103.9114, |
|
"eval_samples_per_second": 81.339, |
|
"eval_sequential_score": 0.8661855182205395, |
|
"eval_steps_per_second": 2.55, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 25.578460244005047, |
|
"grad_norm": 6.964442253112793, |
|
"learning_rate": 1.1270178197468788e-05, |
|
"loss": 15.857, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 25.578460244005047, |
|
"eval_dim_128_cosine_accuracy": 0.8683151916706106, |
|
"eval_dim_128_dot_accuracy": 0.13712730714623758, |
|
"eval_dim_128_euclidean_accuracy": 0.86819687647894, |
|
"eval_dim_128_manhattan_accuracy": 0.8739943208707998, |
|
"eval_dim_128_max_accuracy": 0.8739943208707998, |
|
"eval_dim_256_cosine_accuracy": 0.8717463322290582, |
|
"eval_dim_256_dot_accuracy": 0.13073828679602462, |
|
"eval_dim_256_euclidean_accuracy": 0.871509701845717, |
|
"eval_dim_256_manhattan_accuracy": 0.8761239943208708, |
|
"eval_dim_256_max_accuracy": 0.8761239943208708, |
|
"eval_dim_384_cosine_accuracy": 0.8731661145291055, |
|
"eval_dim_384_dot_accuracy": 0.12683388547089447, |
|
"eval_dim_384_euclidean_accuracy": 0.8731661145291055, |
|
"eval_dim_384_manhattan_accuracy": 0.8755324183625177, |
|
"eval_dim_384_max_accuracy": 0.8755324183625177, |
|
"eval_dim_64_cosine_accuracy": 0.8663038334122102, |
|
"eval_dim_64_dot_accuracy": 0.1499053478466635, |
|
"eval_dim_64_euclidean_accuracy": 0.865120681495504, |
|
"eval_dim_64_manhattan_accuracy": 0.8748225272124941, |
|
"eval_dim_64_max_accuracy": 0.8748225272124941, |
|
"eval_loss": 16.505783081054688, |
|
"eval_runtime": 102.7207, |
|
"eval_samples_per_second": 82.281, |
|
"eval_sequential_score": 0.8663038334122102, |
|
"eval_steps_per_second": 2.58, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 25.915018931426168, |
|
"grad_norm": 17.978727340698242, |
|
"learning_rate": 1.1035901638322392e-05, |
|
"loss": 15.8291, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 25.915018931426168, |
|
"eval_dim_128_cosine_accuracy": 0.8673686701372456, |
|
"eval_dim_128_dot_accuracy": 0.13771888310459063, |
|
"eval_dim_128_euclidean_accuracy": 0.8665404637955514, |
|
"eval_dim_128_manhattan_accuracy": 0.8745858968291529, |
|
"eval_dim_128_max_accuracy": 0.8745858968291529, |
|
"eval_dim_256_cosine_accuracy": 0.8702082347373403, |
|
"eval_dim_256_dot_accuracy": 0.1320397539044013, |
|
"eval_dim_256_euclidean_accuracy": 0.868788452437293, |
|
"eval_dim_256_manhattan_accuracy": 0.8756507335541883, |
|
"eval_dim_256_max_accuracy": 0.8756507335541883, |
|
"eval_dim_384_cosine_accuracy": 0.8705631803123521, |
|
"eval_dim_384_dot_accuracy": 0.1294368196876479, |
|
"eval_dim_384_euclidean_accuracy": 0.8705631803123521, |
|
"eval_dim_384_manhattan_accuracy": 0.8762423095125415, |
|
"eval_dim_384_max_accuracy": 0.8762423095125415, |
|
"eval_dim_64_cosine_accuracy": 0.8644107903454804, |
|
"eval_dim_64_dot_accuracy": 0.15309985802177, |
|
"eval_dim_64_euclidean_accuracy": 0.8640558447704685, |
|
"eval_dim_64_manhattan_accuracy": 0.8731661145291055, |
|
"eval_dim_64_max_accuracy": 0.8731661145291055, |
|
"eval_loss": 16.520679473876953, |
|
"eval_runtime": 104.1552, |
|
"eval_samples_per_second": 81.148, |
|
"eval_sequential_score": 0.8644107903454804, |
|
"eval_steps_per_second": 2.544, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 26.251577618847286, |
|
"grad_norm": 7.759204387664795, |
|
"learning_rate": 1.080104885737807e-05, |
|
"loss": 15.8802, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 26.251577618847286, |
|
"eval_dim_128_cosine_accuracy": 0.867841930903928, |
|
"eval_dim_128_dot_accuracy": 0.13913866540463796, |
|
"eval_dim_128_euclidean_accuracy": 0.86819687647894, |
|
"eval_dim_128_manhattan_accuracy": 0.8750591575958353, |
|
"eval_dim_128_max_accuracy": 0.8750591575958353, |
|
"eval_dim_256_cosine_accuracy": 0.8697349739706578, |
|
"eval_dim_256_dot_accuracy": 0.1318031235210601, |
|
"eval_dim_256_euclidean_accuracy": 0.8697349739706578, |
|
"eval_dim_256_manhattan_accuracy": 0.8764789398958827, |
|
"eval_dim_256_max_accuracy": 0.8764789398958827, |
|
"eval_dim_384_cosine_accuracy": 0.8713913866540464, |
|
"eval_dim_384_dot_accuracy": 0.1286086133459536, |
|
"eval_dim_384_euclidean_accuracy": 0.8713913866540464, |
|
"eval_dim_384_manhattan_accuracy": 0.8762423095125415, |
|
"eval_dim_384_max_accuracy": 0.8762423095125415, |
|
"eval_dim_64_cosine_accuracy": 0.8664221486038808, |
|
"eval_dim_64_dot_accuracy": 0.15061523899668716, |
|
"eval_dim_64_euclidean_accuracy": 0.8655939422621864, |
|
"eval_dim_64_manhattan_accuracy": 0.8737576904874585, |
|
"eval_dim_64_max_accuracy": 0.8737576904874585, |
|
"eval_loss": 16.52326011657715, |
|
"eval_runtime": 103.0214, |
|
"eval_samples_per_second": 82.041, |
|
"eval_sequential_score": 0.8664221486038808, |
|
"eval_steps_per_second": 2.572, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 26.588136306268407, |
|
"grad_norm": 6.951057434082031, |
|
"learning_rate": 1.0565750491837925e-05, |
|
"loss": 15.846, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 26.588136306268407, |
|
"eval_dim_128_cosine_accuracy": 0.8685518220539518, |
|
"eval_dim_128_dot_accuracy": 0.13689067676289635, |
|
"eval_dim_128_euclidean_accuracy": 0.867841930903928, |
|
"eval_dim_128_manhattan_accuracy": 0.8729294841457643, |
|
"eval_dim_128_max_accuracy": 0.8729294841457643, |
|
"eval_dim_256_cosine_accuracy": 0.8712730714623758, |
|
"eval_dim_256_dot_accuracy": 0.13097491717936582, |
|
"eval_dim_256_euclidean_accuracy": 0.8704448651206815, |
|
"eval_dim_256_manhattan_accuracy": 0.8765972550875533, |
|
"eval_dim_256_max_accuracy": 0.8765972550875533, |
|
"eval_dim_384_cosine_accuracy": 0.8717463322290582, |
|
"eval_dim_384_dot_accuracy": 0.1282536677709418, |
|
"eval_dim_384_euclidean_accuracy": 0.8717463322290582, |
|
"eval_dim_384_manhattan_accuracy": 0.8741126360624705, |
|
"eval_dim_384_max_accuracy": 0.8741126360624705, |
|
"eval_dim_64_cosine_accuracy": 0.8654756270705158, |
|
"eval_dim_64_dot_accuracy": 0.1499053478466635, |
|
"eval_dim_64_euclidean_accuracy": 0.865120681495504, |
|
"eval_dim_64_manhattan_accuracy": 0.8729294841457643, |
|
"eval_dim_64_max_accuracy": 0.8729294841457643, |
|
"eval_loss": 16.517038345336914, |
|
"eval_runtime": 103.0824, |
|
"eval_samples_per_second": 81.993, |
|
"eval_sequential_score": 0.8654756270705158, |
|
"eval_steps_per_second": 2.571, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 26.924694993689524, |
|
"grad_norm": 7.398913860321045, |
|
"learning_rate": 1.0330137426761136e-05, |
|
"loss": 15.8012, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 26.924694993689524, |
|
"eval_dim_128_cosine_accuracy": 0.8663038334122102, |
|
"eval_dim_128_dot_accuracy": 0.1386654046379555, |
|
"eval_dim_128_euclidean_accuracy": 0.865712257453857, |
|
"eval_dim_128_manhattan_accuracy": 0.8742309512541411, |
|
"eval_dim_128_max_accuracy": 0.8742309512541411, |
|
"eval_dim_256_cosine_accuracy": 0.86819687647894, |
|
"eval_dim_256_dot_accuracy": 0.1319214387127307, |
|
"eval_dim_256_euclidean_accuracy": 0.8680785612872692, |
|
"eval_dim_256_manhattan_accuracy": 0.8762423095125415, |
|
"eval_dim_256_max_accuracy": 0.8762423095125415, |
|
"eval_dim_384_cosine_accuracy": 0.8698532891623284, |
|
"eval_dim_384_dot_accuracy": 0.13014671083767157, |
|
"eval_dim_384_euclidean_accuracy": 0.8698532891623284, |
|
"eval_dim_384_manhattan_accuracy": 0.8737576904874585, |
|
"eval_dim_384_max_accuracy": 0.8737576904874585, |
|
"eval_dim_64_cosine_accuracy": 0.8634642688121155, |
|
"eval_dim_64_dot_accuracy": 0.1508518693800284, |
|
"eval_dim_64_euclidean_accuracy": 0.8637008991954567, |
|
"eval_dim_64_manhattan_accuracy": 0.8728111689540937, |
|
"eval_dim_64_max_accuracy": 0.8728111689540937, |
|
"eval_loss": 16.53356170654297, |
|
"eval_runtime": 104.1737, |
|
"eval_samples_per_second": 81.134, |
|
"eval_sequential_score": 0.8634642688121155, |
|
"eval_steps_per_second": 2.544, |
|
"step": 4000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 7400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|