|
{ |
|
"best_metric": 0.619316577911377, |
|
"best_model_checkpoint": "./vit-base-beans/checkpoint-400", |
|
"epoch": 3.883495145631068, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019805825242718447, |
|
"loss": 2.2781, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019611650485436895, |
|
"loss": 2.0539, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.6347826086956522, |
|
"eval_loss": 1.684175968170166, |
|
"eval_runtime": 199.797, |
|
"eval_samples_per_second": 2.302, |
|
"eval_steps_per_second": 0.29, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001941747572815534, |
|
"loss": 1.7246, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019223300970873787, |
|
"loss": 1.6027, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00019029126213592236, |
|
"loss": 1.4159, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.6934782608695652, |
|
"eval_loss": 1.2154264450073242, |
|
"eval_runtime": 14.461, |
|
"eval_samples_per_second": 31.81, |
|
"eval_steps_per_second": 4.011, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00018834951456310681, |
|
"loss": 1.243, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00018640776699029127, |
|
"loss": 1.2138, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.741304347826087, |
|
"eval_loss": 1.0337247848510742, |
|
"eval_runtime": 14.5332, |
|
"eval_samples_per_second": 31.652, |
|
"eval_steps_per_second": 3.991, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00018446601941747576, |
|
"loss": 1.096, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00018252427184466022, |
|
"loss": 1.012, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00018058252427184467, |
|
"loss": 0.8889, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.7652173913043478, |
|
"eval_loss": 0.845034122467041, |
|
"eval_runtime": 14.4751, |
|
"eval_samples_per_second": 31.779, |
|
"eval_steps_per_second": 4.007, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00017864077669902913, |
|
"loss": 0.7514, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00017669902912621362, |
|
"loss": 0.7239, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_accuracy": 0.6869565217391305, |
|
"eval_loss": 1.0301238298416138, |
|
"eval_runtime": 13.8604, |
|
"eval_samples_per_second": 33.188, |
|
"eval_steps_per_second": 4.185, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017475728155339805, |
|
"loss": 0.6558, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00017281553398058253, |
|
"loss": 0.6192, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.000170873786407767, |
|
"loss": 0.6241, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_accuracy": 0.7869565217391304, |
|
"eval_loss": 0.7646523118019104, |
|
"eval_runtime": 13.9259, |
|
"eval_samples_per_second": 33.032, |
|
"eval_steps_per_second": 4.165, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00016893203883495145, |
|
"loss": 0.6511, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00016699029126213594, |
|
"loss": 0.7607, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_accuracy": 0.7956521739130434, |
|
"eval_loss": 0.7208316922187805, |
|
"eval_runtime": 15.3767, |
|
"eval_samples_per_second": 29.915, |
|
"eval_steps_per_second": 3.772, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0001650485436893204, |
|
"loss": 0.6508, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00016310679611650485, |
|
"loss": 0.5474, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0001611650485436893, |
|
"loss": 0.5841, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.7913043478260869, |
|
"eval_loss": 0.6974421143531799, |
|
"eval_runtime": 13.8536, |
|
"eval_samples_per_second": 33.204, |
|
"eval_steps_per_second": 4.187, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0001592233009708738, |
|
"loss": 0.5992, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.00015728155339805825, |
|
"loss": 0.3827, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_accuracy": 0.7869565217391304, |
|
"eval_loss": 0.6890577077865601, |
|
"eval_runtime": 14.5884, |
|
"eval_samples_per_second": 31.532, |
|
"eval_steps_per_second": 3.976, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.0001553398058252427, |
|
"loss": 0.4169, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0001533980582524272, |
|
"loss": 0.3825, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00015145631067961166, |
|
"loss": 0.3534, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.6881299614906311, |
|
"eval_runtime": 14.5452, |
|
"eval_samples_per_second": 31.626, |
|
"eval_steps_per_second": 3.988, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.00014951456310679611, |
|
"loss": 0.2475, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0001475728155339806, |
|
"loss": 0.438, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_accuracy": 0.7869565217391304, |
|
"eval_loss": 0.794903039932251, |
|
"eval_runtime": 14.6276, |
|
"eval_samples_per_second": 31.447, |
|
"eval_steps_per_second": 3.965, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.00014563106796116506, |
|
"loss": 0.3039, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.00014368932038834952, |
|
"loss": 0.2605, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.000141747572815534, |
|
"loss": 0.4453, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_accuracy": 0.7804347826086957, |
|
"eval_loss": 0.7851635217666626, |
|
"eval_runtime": 14.6262, |
|
"eval_samples_per_second": 31.45, |
|
"eval_steps_per_second": 3.965, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.00013980582524271846, |
|
"loss": 0.3196, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 0.00013786407766990292, |
|
"loss": 0.1714, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"eval_accuracy": 0.7956521739130434, |
|
"eval_loss": 0.7182856798171997, |
|
"eval_runtime": 16.1817, |
|
"eval_samples_per_second": 28.427, |
|
"eval_steps_per_second": 3.584, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.0001359223300970874, |
|
"loss": 0.1467, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.00013398058252427186, |
|
"loss": 0.1825, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.00013203883495145632, |
|
"loss": 0.1664, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.6710854172706604, |
|
"eval_runtime": 14.7551, |
|
"eval_samples_per_second": 31.176, |
|
"eval_steps_per_second": 3.931, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.00013009708737864078, |
|
"loss": 0.1266, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.00012815533980582526, |
|
"loss": 0.1962, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_accuracy": 0.8043478260869565, |
|
"eval_loss": 0.7439975738525391, |
|
"eval_runtime": 14.5665, |
|
"eval_samples_per_second": 31.579, |
|
"eval_steps_per_second": 3.982, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.00012621359223300972, |
|
"loss": 0.1609, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 0.00012427184466019418, |
|
"loss": 0.1891, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.00012233009708737864, |
|
"loss": 0.1961, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_accuracy": 0.8391304347826087, |
|
"eval_loss": 0.619316577911377, |
|
"eval_runtime": 14.1432, |
|
"eval_samples_per_second": 32.525, |
|
"eval_steps_per_second": 4.101, |
|
"step": 400 |
|
} |
|
], |
|
"max_steps": 1030, |
|
"num_train_epochs": 10, |
|
"total_flos": 4.929663074408755e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|