|
{ |
|
"best_metric": 0.8188073394495413, |
|
"best_model_checkpoint": "tiny-bert-sst2/run-3/checkpoint-1000", |
|
"epoch": 2.846299810246679, |
|
"eval_steps": 100, |
|
"global_step": 1500, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18975332068311196, |
|
"grad_norm": 7.5281982421875, |
|
"learning_rate": 0.00022048666969254715, |
|
"loss": 1.6888, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18975332068311196, |
|
"eval_accuracy": 0.7740825688073395, |
|
"eval_loss": 1.2137372493743896, |
|
"eval_runtime": 2.512, |
|
"eval_samples_per_second": 347.137, |
|
"eval_steps_per_second": 2.787, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3795066413662239, |
|
"grad_norm": 8.994091033935547, |
|
"learning_rate": 0.00022849311940488983, |
|
"loss": 1.0387, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3795066413662239, |
|
"eval_accuracy": 0.7958715596330275, |
|
"eval_loss": 1.0297660827636719, |
|
"eval_runtime": 2.4479, |
|
"eval_samples_per_second": 356.218, |
|
"eval_steps_per_second": 2.86, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5692599620493358, |
|
"grad_norm": 4.1480183601379395, |
|
"learning_rate": 0.00022592984315022126, |
|
"loss": 0.7891, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5692599620493358, |
|
"eval_accuracy": 0.7935779816513762, |
|
"eval_loss": 0.9698419570922852, |
|
"eval_runtime": 2.4189, |
|
"eval_samples_per_second": 360.492, |
|
"eval_steps_per_second": 2.894, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7590132827324478, |
|
"grad_norm": 6.172699451446533, |
|
"learning_rate": 0.00022165433603986336, |
|
"loss": 0.6767, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7590132827324478, |
|
"eval_accuracy": 0.8222477064220184, |
|
"eval_loss": 0.9135227799415588, |
|
"eval_runtime": 2.4368, |
|
"eval_samples_per_second": 357.851, |
|
"eval_steps_per_second": 2.873, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9487666034155597, |
|
"grad_norm": 6.589296340942383, |
|
"learning_rate": 0.00021573238588267377, |
|
"loss": 0.617, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9487666034155597, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 0.9335416555404663, |
|
"eval_runtime": 2.4972, |
|
"eval_samples_per_second": 349.185, |
|
"eval_steps_per_second": 2.803, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1385199240986716, |
|
"grad_norm": 4.637527942657471, |
|
"learning_rate": 0.00020825511453038828, |
|
"loss": 0.5142, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.1385199240986716, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 1.0174880027770996, |
|
"eval_runtime": 2.431, |
|
"eval_samples_per_second": 358.694, |
|
"eval_steps_per_second": 2.879, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.3282732447817835, |
|
"grad_norm": 3.807858467102051, |
|
"learning_rate": 0.00019933757577330664, |
|
"loss": 0.4511, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.3282732447817835, |
|
"eval_accuracy": 0.8142201834862385, |
|
"eval_loss": 0.9702788591384888, |
|
"eval_runtime": 2.4299, |
|
"eval_samples_per_second": 358.858, |
|
"eval_steps_per_second": 2.881, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.5180265654648957, |
|
"grad_norm": 5.257256984710693, |
|
"learning_rate": 0.000189116984991988, |
|
"loss": 0.4292, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.5180265654648957, |
|
"eval_accuracy": 0.8027522935779816, |
|
"eval_loss": 1.0670255422592163, |
|
"eval_runtime": 2.4833, |
|
"eval_samples_per_second": 351.149, |
|
"eval_steps_per_second": 2.819, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.7077798861480076, |
|
"grad_norm": 4.167674541473389, |
|
"learning_rate": 0.0001777506078055455, |
|
"loss": 0.4201, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.7077798861480076, |
|
"eval_accuracy": 0.8165137614678899, |
|
"eval_loss": 0.966061532497406, |
|
"eval_runtime": 2.4276, |
|
"eval_samples_per_second": 359.207, |
|
"eval_steps_per_second": 2.884, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.8975332068311195, |
|
"grad_norm": 7.251418590545654, |
|
"learning_rate": 0.00016541334020419587, |
|
"loss": 0.4173, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.8975332068311195, |
|
"eval_accuracy": 0.8188073394495413, |
|
"eval_loss": 0.9646121263504028, |
|
"eval_runtime": 2.4466, |
|
"eval_samples_per_second": 356.409, |
|
"eval_steps_per_second": 2.861, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0872865275142316, |
|
"grad_norm": 3.3101117610931396, |
|
"learning_rate": 0.0001522950174008934, |
|
"loss": 0.3537, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.0872865275142316, |
|
"eval_accuracy": 0.8165137614678899, |
|
"eval_loss": 1.00448477268219, |
|
"eval_runtime": 2.4388, |
|
"eval_samples_per_second": 357.559, |
|
"eval_steps_per_second": 2.87, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.2770398481973433, |
|
"grad_norm": 4.73748254776001, |
|
"learning_rate": 0.00013859749281111374, |
|
"loss": 0.3095, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.2770398481973433, |
|
"eval_accuracy": 0.8153669724770642, |
|
"eval_loss": 1.0533220767974854, |
|
"eval_runtime": 2.4451, |
|
"eval_samples_per_second": 356.635, |
|
"eval_steps_per_second": 2.863, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.4667931688804554, |
|
"grad_norm": 2.5525803565979004, |
|
"learning_rate": 0.00012453153210692205, |
|
"loss": 0.3254, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.4667931688804554, |
|
"eval_accuracy": 0.8130733944954128, |
|
"eval_loss": 1.1290571689605713, |
|
"eval_runtime": 2.463, |
|
"eval_samples_per_second": 354.04, |
|
"eval_steps_per_second": 2.842, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.656546489563567, |
|
"grad_norm": 5.130295276641846, |
|
"learning_rate": 0.00011031357013693877, |
|
"loss": 0.3135, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.656546489563567, |
|
"eval_accuracy": 0.8073394495412844, |
|
"eval_loss": 1.0808497667312622, |
|
"eval_runtime": 2.434, |
|
"eval_samples_per_second": 358.263, |
|
"eval_steps_per_second": 2.876, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.846299810246679, |
|
"grad_norm": 2.434037685394287, |
|
"learning_rate": 9.616238061391467e-05, |
|
"loss": 0.3162, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.846299810246679, |
|
"eval_accuracy": 0.8096330275229358, |
|
"eval_loss": 1.1154645681381226, |
|
"eval_runtime": 2.4873, |
|
"eval_samples_per_second": 350.575, |
|
"eval_steps_per_second": 2.814, |
|
"step": 1500 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2635, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 23032282372320.0, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.7363133901666971, |
|
"learning_rate": 0.00022930613648024903, |
|
"num_train_epochs": 5, |
|
"temperature": 13, |
|
"warmup_ratio": 0.03930811279168249 |
|
} |
|
} |
|
|