zoil-llama-13b-4epochs / trainer_state.json
jazza234234's picture
Upload 5 files
e517600
raw
history blame contribute delete
No virus
26.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.807017543859649,
"eval_steps": 50,
"global_step": 4000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 2.478505000877347e-05,
"loss": 2.8559,
"step": 50
},
{
"epoch": 0.04,
"eval_loss": 2.195713520050049,
"eval_runtime": 14.6436,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 50
},
{
"epoch": 0.07,
"learning_rate": 2.4565713283032112e-05,
"loss": 2.2991,
"step": 100
},
{
"epoch": 0.07,
"eval_loss": 1.9645323753356934,
"eval_runtime": 14.6522,
"eval_samples_per_second": 1.433,
"eval_steps_per_second": 0.205,
"step": 100
},
{
"epoch": 0.11,
"learning_rate": 2.4346376557290754e-05,
"loss": 2.1318,
"step": 150
},
{
"epoch": 0.11,
"eval_loss": 1.8834866285324097,
"eval_runtime": 14.646,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 150
},
{
"epoch": 0.14,
"learning_rate": 2.4127039831549393e-05,
"loss": 2.035,
"step": 200
},
{
"epoch": 0.14,
"eval_loss": 1.8372365236282349,
"eval_runtime": 14.6452,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 200
},
{
"epoch": 0.18,
"learning_rate": 2.390770310580804e-05,
"loss": 2.0511,
"step": 250
},
{
"epoch": 0.18,
"eval_loss": 1.8165128231048584,
"eval_runtime": 14.6408,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 250
},
{
"epoch": 0.21,
"learning_rate": 2.3688366380066677e-05,
"loss": 2.0054,
"step": 300
},
{
"epoch": 0.21,
"eval_loss": 1.7788053750991821,
"eval_runtime": 14.6416,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 300
},
{
"epoch": 0.25,
"learning_rate": 2.3469029654325323e-05,
"loss": 1.944,
"step": 350
},
{
"epoch": 0.25,
"eval_loss": 1.7566713094711304,
"eval_runtime": 14.6486,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 350
},
{
"epoch": 0.28,
"learning_rate": 2.324969292858396e-05,
"loss": 1.9487,
"step": 400
},
{
"epoch": 0.28,
"eval_loss": 1.7560102939605713,
"eval_runtime": 14.6501,
"eval_samples_per_second": 1.433,
"eval_steps_per_second": 0.205,
"step": 400
},
{
"epoch": 0.32,
"learning_rate": 2.3030356202842604e-05,
"loss": 1.9183,
"step": 450
},
{
"epoch": 0.32,
"eval_loss": 1.7329343557357788,
"eval_runtime": 14.6419,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 450
},
{
"epoch": 0.35,
"learning_rate": 2.2811019477101246e-05,
"loss": 1.8684,
"step": 500
},
{
"epoch": 0.35,
"eval_loss": 1.7263619899749756,
"eval_runtime": 14.647,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 500
},
{
"epoch": 0.39,
"learning_rate": 2.2591682751359888e-05,
"loss": 1.9194,
"step": 550
},
{
"epoch": 0.39,
"eval_loss": 1.7277635335922241,
"eval_runtime": 14.6488,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 550
},
{
"epoch": 0.42,
"learning_rate": 2.237234602561853e-05,
"loss": 1.8842,
"step": 600
},
{
"epoch": 0.42,
"eval_loss": 1.728346347808838,
"eval_runtime": 14.6409,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 600
},
{
"epoch": 0.46,
"learning_rate": 2.2153009299877172e-05,
"loss": 1.817,
"step": 650
},
{
"epoch": 0.46,
"eval_loss": 1.709412693977356,
"eval_runtime": 14.6427,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 650
},
{
"epoch": 0.49,
"learning_rate": 2.1933672574135815e-05,
"loss": 1.8139,
"step": 700
},
{
"epoch": 0.49,
"eval_loss": 1.7006276845932007,
"eval_runtime": 14.6444,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 700
},
{
"epoch": 0.53,
"learning_rate": 2.1714335848394457e-05,
"loss": 1.8657,
"step": 750
},
{
"epoch": 0.53,
"eval_loss": 1.688984990119934,
"eval_runtime": 14.6453,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 750
},
{
"epoch": 0.56,
"learning_rate": 2.14949991226531e-05,
"loss": 1.8366,
"step": 800
},
{
"epoch": 0.56,
"eval_loss": 1.6756386756896973,
"eval_runtime": 14.6493,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 800
},
{
"epoch": 0.6,
"learning_rate": 2.1275662396911738e-05,
"loss": 1.8521,
"step": 850
},
{
"epoch": 0.6,
"eval_loss": 1.6723501682281494,
"eval_runtime": 14.645,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 850
},
{
"epoch": 0.63,
"learning_rate": 2.1056325671170383e-05,
"loss": 1.8357,
"step": 900
},
{
"epoch": 0.63,
"eval_loss": 1.6671411991119385,
"eval_runtime": 14.646,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 900
},
{
"epoch": 0.67,
"learning_rate": 2.0836988945429022e-05,
"loss": 1.8667,
"step": 950
},
{
"epoch": 0.67,
"eval_loss": 1.6564487218856812,
"eval_runtime": 14.6438,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 950
},
{
"epoch": 0.7,
"learning_rate": 2.0617652219687667e-05,
"loss": 1.8163,
"step": 1000
},
{
"epoch": 0.7,
"eval_loss": 1.6533170938491821,
"eval_runtime": 14.6431,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 1000
},
{
"epoch": 0.74,
"learning_rate": 2.0398315493946306e-05,
"loss": 1.844,
"step": 1050
},
{
"epoch": 0.74,
"eval_loss": 1.6446335315704346,
"eval_runtime": 14.6477,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 1050
},
{
"epoch": 0.77,
"learning_rate": 2.017897876820495e-05,
"loss": 1.8403,
"step": 1100
},
{
"epoch": 0.77,
"eval_loss": 1.6377147436141968,
"eval_runtime": 14.65,
"eval_samples_per_second": 1.433,
"eval_steps_per_second": 0.205,
"step": 1100
},
{
"epoch": 0.81,
"learning_rate": 1.995964204246359e-05,
"loss": 1.8436,
"step": 1150
},
{
"epoch": 0.81,
"eval_loss": 1.6400964260101318,
"eval_runtime": 14.6512,
"eval_samples_per_second": 1.433,
"eval_steps_per_second": 0.205,
"step": 1150
},
{
"epoch": 0.84,
"learning_rate": 1.9740305316722233e-05,
"loss": 1.8255,
"step": 1200
},
{
"epoch": 0.84,
"eval_loss": 1.6376707553863525,
"eval_runtime": 14.6441,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 1200
},
{
"epoch": 0.88,
"learning_rate": 1.9520968590980875e-05,
"loss": 1.7917,
"step": 1250
},
{
"epoch": 0.88,
"eval_loss": 1.6302813291549683,
"eval_runtime": 14.6397,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 1250
},
{
"epoch": 0.91,
"learning_rate": 1.9301631865239517e-05,
"loss": 1.8289,
"step": 1300
},
{
"epoch": 0.91,
"eval_loss": 1.6255093812942505,
"eval_runtime": 14.6406,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 1300
},
{
"epoch": 0.95,
"learning_rate": 1.9082295139498156e-05,
"loss": 1.8736,
"step": 1350
},
{
"epoch": 0.95,
"eval_loss": 1.617380976676941,
"eval_runtime": 14.6455,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 1350
},
{
"epoch": 0.98,
"learning_rate": 1.88629584137568e-05,
"loss": 1.748,
"step": 1400
},
{
"epoch": 0.98,
"eval_loss": 1.6211485862731934,
"eval_runtime": 14.6529,
"eval_samples_per_second": 1.433,
"eval_steps_per_second": 0.205,
"step": 1400
},
{
"epoch": 1.02,
"learning_rate": 1.8643621688015444e-05,
"loss": 1.7769,
"step": 1450
},
{
"epoch": 1.02,
"eval_loss": 1.6201837062835693,
"eval_runtime": 14.6729,
"eval_samples_per_second": 1.431,
"eval_steps_per_second": 0.204,
"step": 1450
},
{
"epoch": 1.05,
"learning_rate": 1.8424284962274086e-05,
"loss": 1.7295,
"step": 1500
},
{
"epoch": 1.05,
"eval_loss": 1.6232948303222656,
"eval_runtime": 14.6843,
"eval_samples_per_second": 1.43,
"eval_steps_per_second": 0.204,
"step": 1500
},
{
"epoch": 1.09,
"learning_rate": 1.8204948236532728e-05,
"loss": 1.6845,
"step": 1550
},
{
"epoch": 1.09,
"eval_loss": 1.626202940940857,
"eval_runtime": 14.6738,
"eval_samples_per_second": 1.431,
"eval_steps_per_second": 0.204,
"step": 1550
},
{
"epoch": 1.12,
"learning_rate": 1.7985611510791367e-05,
"loss": 1.6655,
"step": 1600
},
{
"epoch": 1.12,
"eval_loss": 1.617616057395935,
"eval_runtime": 14.6736,
"eval_samples_per_second": 1.431,
"eval_steps_per_second": 0.204,
"step": 1600
},
{
"epoch": 1.16,
"learning_rate": 1.7766274785050012e-05,
"loss": 1.6687,
"step": 1650
},
{
"epoch": 1.16,
"eval_loss": 1.6166654825210571,
"eval_runtime": 14.7006,
"eval_samples_per_second": 1.429,
"eval_steps_per_second": 0.204,
"step": 1650
},
{
"epoch": 1.19,
"learning_rate": 1.754693805930865e-05,
"loss": 1.704,
"step": 1700
},
{
"epoch": 1.19,
"eval_loss": 1.6115292310714722,
"eval_runtime": 14.6762,
"eval_samples_per_second": 1.431,
"eval_steps_per_second": 0.204,
"step": 1700
},
{
"epoch": 1.23,
"learning_rate": 1.7327601333567293e-05,
"loss": 1.6818,
"step": 1750
},
{
"epoch": 1.23,
"eval_loss": 1.6096872091293335,
"eval_runtime": 14.6731,
"eval_samples_per_second": 1.431,
"eval_steps_per_second": 0.204,
"step": 1750
},
{
"epoch": 1.26,
"learning_rate": 1.7108264607825935e-05,
"loss": 1.675,
"step": 1800
},
{
"epoch": 1.26,
"eval_loss": 1.6069471836090088,
"eval_runtime": 14.6844,
"eval_samples_per_second": 1.43,
"eval_steps_per_second": 0.204,
"step": 1800
},
{
"epoch": 1.3,
"learning_rate": 1.6888927882084577e-05,
"loss": 1.6865,
"step": 1850
},
{
"epoch": 1.3,
"eval_loss": 1.6076393127441406,
"eval_runtime": 14.648,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 1850
},
{
"epoch": 1.33,
"learning_rate": 1.666959115634322e-05,
"loss": 1.6966,
"step": 1900
},
{
"epoch": 1.33,
"eval_loss": 1.603057861328125,
"eval_runtime": 14.6382,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 1900
},
{
"epoch": 1.37,
"learning_rate": 1.6450254430601862e-05,
"loss": 1.7239,
"step": 1950
},
{
"epoch": 1.37,
"eval_loss": 1.6086949110031128,
"eval_runtime": 14.6442,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 1950
},
{
"epoch": 1.4,
"learning_rate": 1.62309177048605e-05,
"loss": 1.677,
"step": 2000
},
{
"epoch": 1.4,
"eval_loss": 1.6053651571273804,
"eval_runtime": 14.6414,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 2000
},
{
"epoch": 1.44,
"learning_rate": 1.6011580979119146e-05,
"loss": 1.6925,
"step": 2050
},
{
"epoch": 1.44,
"eval_loss": 1.5961679220199585,
"eval_runtime": 14.6373,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 2050
},
{
"epoch": 1.47,
"learning_rate": 1.5792244253377785e-05,
"loss": 1.7188,
"step": 2100
},
{
"epoch": 1.47,
"eval_loss": 1.591933250427246,
"eval_runtime": 14.6374,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 2100
},
{
"epoch": 1.51,
"learning_rate": 1.557290752763643e-05,
"loss": 1.6994,
"step": 2150
},
{
"epoch": 1.51,
"eval_loss": 1.5993770360946655,
"eval_runtime": 14.6433,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 2150
},
{
"epoch": 1.54,
"learning_rate": 1.535357080189507e-05,
"loss": 1.6817,
"step": 2200
},
{
"epoch": 1.54,
"eval_loss": 1.5951728820800781,
"eval_runtime": 14.6423,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 2200
},
{
"epoch": 1.58,
"learning_rate": 1.5134234076153711e-05,
"loss": 1.6576,
"step": 2250
},
{
"epoch": 1.58,
"eval_loss": 1.595167636871338,
"eval_runtime": 14.6398,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 2250
},
{
"epoch": 1.61,
"learning_rate": 1.4914897350412355e-05,
"loss": 1.7117,
"step": 2300
},
{
"epoch": 1.61,
"eval_loss": 1.594815969467163,
"eval_runtime": 14.633,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 2300
},
{
"epoch": 1.65,
"learning_rate": 1.4695560624670996e-05,
"loss": 1.6795,
"step": 2350
},
{
"epoch": 1.65,
"eval_loss": 1.5858888626098633,
"eval_runtime": 14.6449,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 2350
},
{
"epoch": 1.68,
"learning_rate": 1.4476223898929636e-05,
"loss": 1.7084,
"step": 2400
},
{
"epoch": 1.68,
"eval_loss": 1.584323525428772,
"eval_runtime": 14.6394,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 2400
},
{
"epoch": 1.72,
"learning_rate": 1.425688717318828e-05,
"loss": 1.6785,
"step": 2450
},
{
"epoch": 1.72,
"eval_loss": 1.5876024961471558,
"eval_runtime": 14.6397,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 2450
},
{
"epoch": 1.75,
"learning_rate": 1.403755044744692e-05,
"loss": 1.6863,
"step": 2500
},
{
"epoch": 1.75,
"eval_loss": 1.5827977657318115,
"eval_runtime": 14.6332,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 2500
},
{
"epoch": 1.79,
"learning_rate": 1.3818213721705564e-05,
"loss": 1.718,
"step": 2550
},
{
"epoch": 1.79,
"eval_loss": 1.5843836069107056,
"eval_runtime": 14.6334,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 2550
},
{
"epoch": 1.82,
"learning_rate": 1.3598876995964205e-05,
"loss": 1.6811,
"step": 2600
},
{
"epoch": 1.82,
"eval_loss": 1.5835539102554321,
"eval_runtime": 14.6371,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 2600
},
{
"epoch": 1.86,
"learning_rate": 1.3379540270222845e-05,
"loss": 1.6748,
"step": 2650
},
{
"epoch": 1.86,
"eval_loss": 1.5856655836105347,
"eval_runtime": 14.6358,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 2650
},
{
"epoch": 1.89,
"learning_rate": 1.3160203544481489e-05,
"loss": 1.6614,
"step": 2700
},
{
"epoch": 1.89,
"eval_loss": 1.5807286500930786,
"eval_runtime": 14.6393,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 2700
},
{
"epoch": 1.93,
"learning_rate": 1.294086681874013e-05,
"loss": 1.6587,
"step": 2750
},
{
"epoch": 1.93,
"eval_loss": 1.5819549560546875,
"eval_runtime": 14.6411,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 2750
},
{
"epoch": 1.96,
"learning_rate": 1.2721530092998773e-05,
"loss": 1.7167,
"step": 2800
},
{
"epoch": 1.96,
"eval_loss": 1.5828579664230347,
"eval_runtime": 14.6359,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 2800
},
{
"epoch": 2.0,
"learning_rate": 1.2502193367257414e-05,
"loss": 1.663,
"step": 2850
},
{
"epoch": 2.0,
"eval_loss": 1.5743989944458008,
"eval_runtime": 14.6401,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 2850
},
{
"epoch": 2.04,
"learning_rate": 1.2282856641516056e-05,
"loss": 1.5641,
"step": 2900
},
{
"epoch": 2.04,
"eval_loss": 1.5980640649795532,
"eval_runtime": 14.6296,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 2900
},
{
"epoch": 2.07,
"learning_rate": 1.2063519915774696e-05,
"loss": 1.6048,
"step": 2950
},
{
"epoch": 2.07,
"eval_loss": 1.598111629486084,
"eval_runtime": 14.6392,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 2950
},
{
"epoch": 2.11,
"learning_rate": 1.1844183190033339e-05,
"loss": 1.542,
"step": 3000
},
{
"epoch": 2.11,
"eval_loss": 1.5949300527572632,
"eval_runtime": 14.6318,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 3000
},
{
"epoch": 2.14,
"learning_rate": 1.162484646429198e-05,
"loss": 1.5717,
"step": 3050
},
{
"epoch": 2.14,
"eval_loss": 1.5896787643432617,
"eval_runtime": 14.6355,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 3050
},
{
"epoch": 2.18,
"learning_rate": 1.1405509738550623e-05,
"loss": 1.5775,
"step": 3100
},
{
"epoch": 2.18,
"eval_loss": 1.5945594310760498,
"eval_runtime": 14.6318,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 3100
},
{
"epoch": 2.21,
"learning_rate": 1.1186173012809265e-05,
"loss": 1.5514,
"step": 3150
},
{
"epoch": 2.21,
"eval_loss": 1.596181035041809,
"eval_runtime": 14.6312,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 3150
},
{
"epoch": 2.25,
"learning_rate": 1.0966836287067907e-05,
"loss": 1.5479,
"step": 3200
},
{
"epoch": 2.25,
"eval_loss": 1.5988695621490479,
"eval_runtime": 14.6305,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 3200
},
{
"epoch": 2.28,
"learning_rate": 1.074749956132655e-05,
"loss": 1.5567,
"step": 3250
},
{
"epoch": 2.28,
"eval_loss": 1.5939878225326538,
"eval_runtime": 14.6282,
"eval_samples_per_second": 1.436,
"eval_steps_per_second": 0.205,
"step": 3250
},
{
"epoch": 2.32,
"learning_rate": 1.0528162835585192e-05,
"loss": 1.5917,
"step": 3300
},
{
"epoch": 2.32,
"eval_loss": 1.5928601026535034,
"eval_runtime": 14.6304,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 3300
},
{
"epoch": 2.35,
"learning_rate": 1.0308826109843834e-05,
"loss": 1.5635,
"step": 3350
},
{
"epoch": 2.35,
"eval_loss": 1.5916301012039185,
"eval_runtime": 14.6301,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 3350
},
{
"epoch": 2.39,
"learning_rate": 1.0089489384102474e-05,
"loss": 1.6097,
"step": 3400
},
{
"epoch": 2.39,
"eval_loss": 1.5879075527191162,
"eval_runtime": 14.6343,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 3400
},
{
"epoch": 2.42,
"learning_rate": 9.870152658361116e-06,
"loss": 1.5256,
"step": 3450
},
{
"epoch": 2.42,
"eval_loss": 1.5953552722930908,
"eval_runtime": 14.6382,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 3450
},
{
"epoch": 2.46,
"learning_rate": 9.650815932619759e-06,
"loss": 1.5758,
"step": 3500
},
{
"epoch": 2.46,
"eval_loss": 1.5901867151260376,
"eval_runtime": 14.6575,
"eval_samples_per_second": 1.433,
"eval_steps_per_second": 0.205,
"step": 3500
},
{
"epoch": 2.49,
"learning_rate": 9.4314792068784e-06,
"loss": 1.5924,
"step": 3550
},
{
"epoch": 2.49,
"eval_loss": 1.592368721961975,
"eval_runtime": 14.6369,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 3550
},
{
"epoch": 2.53,
"learning_rate": 9.212142481137043e-06,
"loss": 1.5442,
"step": 3600
},
{
"epoch": 2.53,
"eval_loss": 1.5874643325805664,
"eval_runtime": 14.6605,
"eval_samples_per_second": 1.432,
"eval_steps_per_second": 0.205,
"step": 3600
},
{
"epoch": 2.56,
"learning_rate": 8.992805755395683e-06,
"loss": 1.5633,
"step": 3650
},
{
"epoch": 2.56,
"eval_loss": 1.5897241830825806,
"eval_runtime": 14.6465,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 3650
},
{
"epoch": 2.6,
"learning_rate": 8.773469029654325e-06,
"loss": 1.5578,
"step": 3700
},
{
"epoch": 2.6,
"eval_loss": 1.5877512693405151,
"eval_runtime": 14.6433,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 3700
},
{
"epoch": 2.63,
"learning_rate": 8.554132303912968e-06,
"loss": 1.5486,
"step": 3750
},
{
"epoch": 2.63,
"eval_loss": 1.5919996500015259,
"eval_runtime": 14.6424,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 3750
},
{
"epoch": 2.67,
"learning_rate": 8.33479557817161e-06,
"loss": 1.559,
"step": 3800
},
{
"epoch": 2.67,
"eval_loss": 1.5883424282073975,
"eval_runtime": 14.6464,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 3800
},
{
"epoch": 2.7,
"learning_rate": 8.11545885243025e-06,
"loss": 1.5766,
"step": 3850
},
{
"epoch": 2.7,
"eval_loss": 1.5866857767105103,
"eval_runtime": 14.6428,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 3850
},
{
"epoch": 2.74,
"learning_rate": 7.896122126688892e-06,
"loss": 1.5489,
"step": 3900
},
{
"epoch": 2.74,
"eval_loss": 1.5900993347167969,
"eval_runtime": 14.6566,
"eval_samples_per_second": 1.433,
"eval_steps_per_second": 0.205,
"step": 3900
},
{
"epoch": 2.77,
"learning_rate": 7.676785400947535e-06,
"loss": 1.5606,
"step": 3950
},
{
"epoch": 2.77,
"eval_loss": 1.5870875120162964,
"eval_runtime": 14.632,
"eval_samples_per_second": 1.435,
"eval_steps_per_second": 0.205,
"step": 3950
},
{
"epoch": 2.81,
"learning_rate": 7.457448675206178e-06,
"loss": 1.5128,
"step": 4000
},
{
"epoch": 2.81,
"eval_loss": 1.586663007736206,
"eval_runtime": 14.6412,
"eval_samples_per_second": 1.434,
"eval_steps_per_second": 0.205,
"step": 4000
}
],
"logging_steps": 50,
"max_steps": 5700,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 400,
"total_flos": 7.98304400105472e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}