|
{ |
|
"best_metric": 0.5128149390220642, |
|
"best_model_checkpoint": "/mnt/beegfs/farid/mlora/outputs/xnli/aya-101/hi/rank4_lr5e-5/checkpoint-6000", |
|
"epoch": 0.24445893089960888, |
|
"eval_steps": 500, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.020371577574967405, |
|
"grad_norm": 1.9568703174591064, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.1534, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.020371577574967405, |
|
"eval_accuracy": 0.37991967871485943, |
|
"eval_f1": 0.3396581059481989, |
|
"eval_loss": 1.0917378664016724, |
|
"eval_runtime": 413.6608, |
|
"eval_samples_per_second": 6.019, |
|
"eval_steps_per_second": 0.377, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04074315514993481, |
|
"grad_norm": 9.426871299743652, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 1.0218, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04074315514993481, |
|
"eval_accuracy": 0.6409638554216868, |
|
"eval_f1": 0.6436728838823977, |
|
"eval_loss": 0.8600370287895203, |
|
"eval_runtime": 412.0934, |
|
"eval_samples_per_second": 6.042, |
|
"eval_steps_per_second": 0.379, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06111473272490222, |
|
"grad_norm": 20.413841247558594, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.8582, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.06111473272490222, |
|
"eval_accuracy": 0.7369477911646586, |
|
"eval_f1": 0.7370571656251429, |
|
"eval_loss": 0.6784067749977112, |
|
"eval_runtime": 431.3062, |
|
"eval_samples_per_second": 5.773, |
|
"eval_steps_per_second": 0.362, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08148631029986962, |
|
"grad_norm": 8.49395751953125, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.7793, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08148631029986962, |
|
"eval_accuracy": 0.7682730923694779, |
|
"eval_f1": 0.7693110628872669, |
|
"eval_loss": 0.611162006855011, |
|
"eval_runtime": 431.4602, |
|
"eval_samples_per_second": 5.771, |
|
"eval_steps_per_second": 0.362, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.10185788787483703, |
|
"grad_norm": 7.8166375160217285, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.7563, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.10185788787483703, |
|
"eval_accuracy": 0.7871485943775101, |
|
"eval_f1": 0.7881188800855449, |
|
"eval_loss": 0.5777685046195984, |
|
"eval_runtime": 411.445, |
|
"eval_samples_per_second": 6.052, |
|
"eval_steps_per_second": 0.379, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.12222946544980444, |
|
"grad_norm": 11.658799171447754, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.7191, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.12222946544980444, |
|
"eval_accuracy": 0.7971887550200804, |
|
"eval_f1": 0.7971744387340783, |
|
"eval_loss": 0.5552772283554077, |
|
"eval_runtime": 411.1455, |
|
"eval_samples_per_second": 6.056, |
|
"eval_steps_per_second": 0.379, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.14260104302477183, |
|
"grad_norm": 8.530998229980469, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.7052, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.14260104302477183, |
|
"eval_accuracy": 0.8016064257028113, |
|
"eval_f1": 0.8021556716106938, |
|
"eval_loss": 0.5359864234924316, |
|
"eval_runtime": 411.8163, |
|
"eval_samples_per_second": 6.046, |
|
"eval_steps_per_second": 0.379, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.16297262059973924, |
|
"grad_norm": 6.73158597946167, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.7079, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.16297262059973924, |
|
"eval_accuracy": 0.802008032128514, |
|
"eval_f1": 0.8023252480910511, |
|
"eval_loss": 0.5278254747390747, |
|
"eval_runtime": 411.3361, |
|
"eval_samples_per_second": 6.053, |
|
"eval_steps_per_second": 0.379, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.18334419817470665, |
|
"grad_norm": 9.069704055786133, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.6775, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.18334419817470665, |
|
"eval_accuracy": 0.8036144578313253, |
|
"eval_f1": 0.8043620274591458, |
|
"eval_loss": 0.5234741568565369, |
|
"eval_runtime": 431.5576, |
|
"eval_samples_per_second": 5.77, |
|
"eval_steps_per_second": 0.361, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.20371577574967406, |
|
"grad_norm": 9.322051048278809, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.6977, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.20371577574967406, |
|
"eval_accuracy": 0.8088353413654619, |
|
"eval_f1": 0.8089545270025741, |
|
"eval_loss": 0.5214890837669373, |
|
"eval_runtime": 412.4688, |
|
"eval_samples_per_second": 6.037, |
|
"eval_steps_per_second": 0.378, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.22408735332464147, |
|
"grad_norm": 5.2376508712768555, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.6834, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.22408735332464147, |
|
"eval_accuracy": 0.8076305220883534, |
|
"eval_f1": 0.8082794716875278, |
|
"eval_loss": 0.5130343437194824, |
|
"eval_runtime": 411.1989, |
|
"eval_samples_per_second": 6.055, |
|
"eval_steps_per_second": 0.379, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.24445893089960888, |
|
"grad_norm": 9.288165092468262, |
|
"learning_rate": 0.0, |
|
"loss": 0.6639, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24445893089960888, |
|
"eval_accuracy": 0.8080321285140563, |
|
"eval_f1": 0.8086136034722085, |
|
"eval_loss": 0.5128149390220642, |
|
"eval_runtime": 411.1581, |
|
"eval_samples_per_second": 6.056, |
|
"eval_steps_per_second": 0.379, |
|
"step": 6000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 8.03166870528e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|