ryusangwon's picture
Model save
fa51dfe verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.37230081906180196,
"eval_steps": 500,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.98758997269794e-05,
"loss": 1.8076,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 4.97517994539588e-05,
"loss": 1.792,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 4.96276991809382e-05,
"loss": 1.7935,
"step": 30
},
{
"epoch": 0.03,
"learning_rate": 4.95035989079176e-05,
"loss": 1.7432,
"step": 40
},
{
"epoch": 0.04,
"learning_rate": 4.9379498634897e-05,
"loss": 1.7369,
"step": 50
},
{
"epoch": 0.04,
"learning_rate": 4.92553983618764e-05,
"loss": 1.7506,
"step": 60
},
{
"epoch": 0.05,
"learning_rate": 4.91312980888558e-05,
"loss": 1.6795,
"step": 70
},
{
"epoch": 0.06,
"learning_rate": 4.90071978158352e-05,
"loss": 1.721,
"step": 80
},
{
"epoch": 0.07,
"learning_rate": 4.88830975428146e-05,
"loss": 1.6954,
"step": 90
},
{
"epoch": 0.07,
"learning_rate": 4.875899726979399e-05,
"loss": 1.6715,
"step": 100
},
{
"epoch": 0.08,
"learning_rate": 4.86348969967734e-05,
"loss": 1.6799,
"step": 110
},
{
"epoch": 0.09,
"learning_rate": 4.8510796723752796e-05,
"loss": 1.7203,
"step": 120
},
{
"epoch": 0.1,
"learning_rate": 4.8386696450732196e-05,
"loss": 1.6643,
"step": 130
},
{
"epoch": 0.1,
"learning_rate": 4.8262596177711595e-05,
"loss": 1.6655,
"step": 140
},
{
"epoch": 0.11,
"learning_rate": 4.8138495904690995e-05,
"loss": 1.681,
"step": 150
},
{
"epoch": 0.12,
"learning_rate": 4.8014395631670394e-05,
"loss": 1.7232,
"step": 160
},
{
"epoch": 0.13,
"learning_rate": 4.789029535864979e-05,
"loss": 1.6658,
"step": 170
},
{
"epoch": 0.13,
"learning_rate": 4.7766195085629186e-05,
"loss": 1.6965,
"step": 180
},
{
"epoch": 0.14,
"learning_rate": 4.764209481260859e-05,
"loss": 1.6745,
"step": 190
},
{
"epoch": 0.15,
"learning_rate": 4.751799453958799e-05,
"loss": 1.6881,
"step": 200
},
{
"epoch": 0.16,
"learning_rate": 4.739389426656739e-05,
"loss": 1.6899,
"step": 210
},
{
"epoch": 0.16,
"learning_rate": 4.726979399354679e-05,
"loss": 1.6825,
"step": 220
},
{
"epoch": 0.17,
"learning_rate": 4.7145693720526184e-05,
"loss": 1.6894,
"step": 230
},
{
"epoch": 0.18,
"learning_rate": 4.702159344750558e-05,
"loss": 1.7016,
"step": 240
},
{
"epoch": 0.19,
"learning_rate": 4.689749317448498e-05,
"loss": 1.6556,
"step": 250
},
{
"epoch": 0.19,
"learning_rate": 4.677339290146439e-05,
"loss": 1.715,
"step": 260
},
{
"epoch": 0.2,
"learning_rate": 4.664929262844379e-05,
"loss": 1.6415,
"step": 270
},
{
"epoch": 0.21,
"learning_rate": 4.652519235542319e-05,
"loss": 1.6986,
"step": 280
},
{
"epoch": 0.22,
"learning_rate": 4.640109208240258e-05,
"loss": 1.6766,
"step": 290
},
{
"epoch": 0.22,
"learning_rate": 4.627699180938198e-05,
"loss": 1.7016,
"step": 300
},
{
"epoch": 0.23,
"learning_rate": 4.615289153636138e-05,
"loss": 1.698,
"step": 310
},
{
"epoch": 0.24,
"learning_rate": 4.602879126334078e-05,
"loss": 1.6692,
"step": 320
},
{
"epoch": 0.25,
"learning_rate": 4.590469099032018e-05,
"loss": 1.6386,
"step": 330
},
{
"epoch": 0.25,
"learning_rate": 4.5780590717299585e-05,
"loss": 1.7369,
"step": 340
},
{
"epoch": 0.26,
"learning_rate": 4.5656490444278984e-05,
"loss": 1.6997,
"step": 350
},
{
"epoch": 0.27,
"learning_rate": 4.553239017125838e-05,
"loss": 1.6983,
"step": 360
},
{
"epoch": 0.28,
"learning_rate": 4.5408289898237776e-05,
"loss": 1.6702,
"step": 370
},
{
"epoch": 0.28,
"learning_rate": 4.5284189625217176e-05,
"loss": 1.6901,
"step": 380
},
{
"epoch": 0.29,
"learning_rate": 4.5160089352196575e-05,
"loss": 1.6916,
"step": 390
},
{
"epoch": 0.3,
"learning_rate": 4.5035989079175975e-05,
"loss": 1.682,
"step": 400
},
{
"epoch": 0.31,
"learning_rate": 4.491188880615538e-05,
"loss": 1.6756,
"step": 410
},
{
"epoch": 0.31,
"learning_rate": 4.4787788533134774e-05,
"loss": 1.6632,
"step": 420
},
{
"epoch": 0.32,
"learning_rate": 4.466368826011417e-05,
"loss": 1.67,
"step": 430
},
{
"epoch": 0.33,
"learning_rate": 4.453958798709357e-05,
"loss": 1.6581,
"step": 440
},
{
"epoch": 0.34,
"learning_rate": 4.441548771407297e-05,
"loss": 1.7106,
"step": 450
},
{
"epoch": 0.34,
"learning_rate": 4.429138744105237e-05,
"loss": 1.6757,
"step": 460
},
{
"epoch": 0.35,
"learning_rate": 4.416728716803177e-05,
"loss": 1.6315,
"step": 470
},
{
"epoch": 0.36,
"learning_rate": 4.404318689501117e-05,
"loss": 1.66,
"step": 480
},
{
"epoch": 0.36,
"learning_rate": 4.391908662199057e-05,
"loss": 1.6966,
"step": 490
},
{
"epoch": 0.37,
"learning_rate": 4.379498634896997e-05,
"loss": 1.6642,
"step": 500
}
],
"logging_steps": 10,
"max_steps": 4029,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 3.1601241096192e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}