|
{ |
|
"best_metric": 0.8268348623853211, |
|
"best_model_checkpoint": "tiny-bert-sst2/run-1/checkpoint-1000", |
|
"epoch": 1.8975332068311195, |
|
"eval_steps": 100, |
|
"global_step": 1000, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18975332068311196, |
|
"grad_norm": 12.37441349029541, |
|
"learning_rate": 0.0002663036500532049, |
|
"loss": 3.6173, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18975332068311196, |
|
"eval_accuracy": 0.783256880733945, |
|
"eval_loss": 2.335937738418579, |
|
"eval_runtime": 2.5245, |
|
"eval_samples_per_second": 345.413, |
|
"eval_steps_per_second": 2.773, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3795066413662239, |
|
"grad_norm": 27.809226989746094, |
|
"learning_rate": 0.00025130149624925485, |
|
"loss": 1.9873, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3795066413662239, |
|
"eval_accuracy": 0.7889908256880734, |
|
"eval_loss": 2.189484119415283, |
|
"eval_runtime": 2.4106, |
|
"eval_samples_per_second": 361.739, |
|
"eval_steps_per_second": 2.904, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5692599620493358, |
|
"grad_norm": 11.954730033874512, |
|
"learning_rate": 0.00022545996685162219, |
|
"loss": 1.5216, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5692599620493358, |
|
"eval_accuracy": 0.8038990825688074, |
|
"eval_loss": 1.8991937637329102, |
|
"eval_runtime": 2.4589, |
|
"eval_samples_per_second": 354.636, |
|
"eval_steps_per_second": 2.847, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7590132827324478, |
|
"grad_norm": 9.482739448547363, |
|
"learning_rate": 0.00019118302272170003, |
|
"loss": 1.2949, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7590132827324478, |
|
"eval_accuracy": 0.8176605504587156, |
|
"eval_loss": 1.869120717048645, |
|
"eval_runtime": 2.4475, |
|
"eval_samples_per_second": 356.276, |
|
"eval_steps_per_second": 2.86, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9487666034155597, |
|
"grad_norm": 9.123330116271973, |
|
"learning_rate": 0.0001516593463429656, |
|
"loss": 1.1952, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9487666034155597, |
|
"eval_accuracy": 0.8096330275229358, |
|
"eval_loss": 1.8021105527877808, |
|
"eval_runtime": 2.432, |
|
"eval_samples_per_second": 358.555, |
|
"eval_steps_per_second": 2.878, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1385199240986716, |
|
"grad_norm": 11.057866096496582, |
|
"learning_rate": 0.00011056570814378025, |
|
"loss": 0.9849, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.1385199240986716, |
|
"eval_accuracy": 0.8211009174311926, |
|
"eval_loss": 1.830503225326538, |
|
"eval_runtime": 2.5073, |
|
"eval_samples_per_second": 347.789, |
|
"eval_steps_per_second": 2.792, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.3282732447817835, |
|
"grad_norm": 11.545559883117676, |
|
"learning_rate": 7.172492744582635e-05, |
|
"loss": 0.8491, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.3282732447817835, |
|
"eval_accuracy": 0.823394495412844, |
|
"eval_loss": 1.8194243907928467, |
|
"eval_runtime": 2.4333, |
|
"eval_samples_per_second": 358.362, |
|
"eval_steps_per_second": 2.877, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.5180265654648957, |
|
"grad_norm": 12.223103523254395, |
|
"learning_rate": 3.8750246917023896e-05, |
|
"loss": 0.8116, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.5180265654648957, |
|
"eval_accuracy": 0.8188073394495413, |
|
"eval_loss": 1.8401293754577637, |
|
"eval_runtime": 2.4562, |
|
"eval_samples_per_second": 355.021, |
|
"eval_steps_per_second": 2.85, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.7077798861480076, |
|
"grad_norm": 15.843563079833984, |
|
"learning_rate": 1.4709203319165587e-05, |
|
"loss": 0.7934, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.7077798861480076, |
|
"eval_accuracy": 0.8268348623853211, |
|
"eval_loss": 1.8587218523025513, |
|
"eval_runtime": 2.4847, |
|
"eval_samples_per_second": 350.954, |
|
"eval_steps_per_second": 2.817, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.8975332068311195, |
|
"grad_norm": 18.251996994018555, |
|
"learning_rate": 1.838263658015889e-06, |
|
"loss": 0.7867, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.8975332068311195, |
|
"eval_accuracy": 0.8268348623853211, |
|
"eval_loss": 1.843582034111023, |
|
"eval_runtime": 2.4417, |
|
"eval_samples_per_second": 357.126, |
|
"eval_steps_per_second": 2.867, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 1054, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 15348240276300.0, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.03937708917768734, |
|
"learning_rate": 0.00026956588481964056, |
|
"num_train_epochs": 2, |
|
"temperature": 9, |
|
"warmup_ratio": 0.026036675250872283 |
|
} |
|
} |
|
|