Qwarkstar-4B / trainer_state.json
qingy2024's picture
Upload checkpoint 120
f55897e verified
raw
history blame
7.32 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.013789141051422005,
"eval_steps": 500,
"global_step": 120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0003447285262855501,
"grad_norm": 2.125,
"learning_rate": 6e-05,
"loss": 1.9343,
"step": 3
},
{
"epoch": 0.0006894570525711002,
"grad_norm": 1.1015625,
"learning_rate": 0.00012,
"loss": 1.8061,
"step": 6
},
{
"epoch": 0.0010341855788566503,
"grad_norm": 0.9765625,
"learning_rate": 0.00018,
"loss": 1.6954,
"step": 9
},
{
"epoch": 0.0013789141051422005,
"grad_norm": 1.4296875,
"learning_rate": 0.0001999999738729554,
"loss": 1.7722,
"step": 12
},
{
"epoch": 0.0017236426314277506,
"grad_norm": 0.9453125,
"learning_rate": 0.00019999983670600854,
"loss": 1.8276,
"step": 15
},
{
"epoch": 0.0020683711577133006,
"grad_norm": 0.81640625,
"learning_rate": 0.00019999958196755926,
"loss": 1.8313,
"step": 18
},
{
"epoch": 0.0024130996839988508,
"grad_norm": 0.7734375,
"learning_rate": 0.00019999920965790713,
"loss": 1.8188,
"step": 21
},
{
"epoch": 0.002757828210284401,
"grad_norm": 1.0234375,
"learning_rate": 0.00019999871977748987,
"loss": 1.8129,
"step": 24
},
{
"epoch": 0.003102556736569951,
"grad_norm": 0.83984375,
"learning_rate": 0.00019999811232688342,
"loss": 1.8128,
"step": 27
},
{
"epoch": 0.0034472852628555013,
"grad_norm": 1.1875,
"learning_rate": 0.000199997387306802,
"loss": 1.8022,
"step": 30
},
{
"epoch": 0.0037920137891410514,
"grad_norm": 1.25,
"learning_rate": 0.000199996544718098,
"loss": 1.9719,
"step": 33
},
{
"epoch": 0.004136742315426601,
"grad_norm": 8.1875,
"learning_rate": 0.00019999558456176205,
"loss": 1.886,
"step": 36
},
{
"epoch": 0.004481470841712152,
"grad_norm": 0.91015625,
"learning_rate": 0.00019999450683892307,
"loss": 1.8663,
"step": 39
},
{
"epoch": 0.0048261993679977015,
"grad_norm": 0.85546875,
"learning_rate": 0.00019999331155084812,
"loss": 1.8967,
"step": 42
},
{
"epoch": 0.005170927894283252,
"grad_norm": 0.6953125,
"learning_rate": 0.00019999199869894256,
"loss": 1.814,
"step": 45
},
{
"epoch": 0.005515656420568802,
"grad_norm": 0.671875,
"learning_rate": 0.0001999905682847499,
"loss": 1.8673,
"step": 48
},
{
"epoch": 0.0058603849468543525,
"grad_norm": 0.75390625,
"learning_rate": 0.0001999890203099519,
"loss": 1.7688,
"step": 51
},
{
"epoch": 0.006205113473139902,
"grad_norm": 0.703125,
"learning_rate": 0.00019998735477636857,
"loss": 1.8559,
"step": 54
},
{
"epoch": 0.006549841999425453,
"grad_norm": 0.6484375,
"learning_rate": 0.00019998557168595803,
"loss": 1.7875,
"step": 57
},
{
"epoch": 0.0068945705257110025,
"grad_norm": 0.81640625,
"learning_rate": 0.0001999836710408168,
"loss": 1.8685,
"step": 60
},
{
"epoch": 0.007239299051996552,
"grad_norm": 8.125,
"learning_rate": 0.00019998165284317945,
"loss": 1.843,
"step": 63
},
{
"epoch": 0.007584027578282103,
"grad_norm": 6.34375,
"learning_rate": 0.0001999795170954188,
"loss": 1.8448,
"step": 66
},
{
"epoch": 0.007928756104567653,
"grad_norm": 15.4375,
"learning_rate": 0.00019997726380004585,
"loss": 1.876,
"step": 69
},
{
"epoch": 0.008273484630853202,
"grad_norm": 3.5,
"learning_rate": 0.00019997489295970993,
"loss": 1.8822,
"step": 72
},
{
"epoch": 0.008618213157138753,
"grad_norm": 1.625,
"learning_rate": 0.00019997240457719838,
"loss": 1.9351,
"step": 75
},
{
"epoch": 0.008962941683424304,
"grad_norm": 0.640625,
"learning_rate": 0.0001999697986554369,
"loss": 1.8457,
"step": 78
},
{
"epoch": 0.009307670209709854,
"grad_norm": 0.65625,
"learning_rate": 0.00019996707519748927,
"loss": 1.8246,
"step": 81
},
{
"epoch": 0.009652398735995403,
"grad_norm": 0.703125,
"learning_rate": 0.00019996423420655756,
"loss": 1.8404,
"step": 84
},
{
"epoch": 0.009997127262280954,
"grad_norm": 0.66015625,
"learning_rate": 0.00019996127568598193,
"loss": 1.8068,
"step": 87
},
{
"epoch": 0.010341855788566504,
"grad_norm": 0.796875,
"learning_rate": 0.0001999581996392408,
"loss": 1.7634,
"step": 90
},
{
"epoch": 0.010686584314852055,
"grad_norm": 0.62890625,
"learning_rate": 0.00019995500606995065,
"loss": 1.8114,
"step": 93
},
{
"epoch": 0.011031312841137604,
"grad_norm": 0.68359375,
"learning_rate": 0.00019995169498186632,
"loss": 1.7789,
"step": 96
},
{
"epoch": 0.011376041367423154,
"grad_norm": 0.7890625,
"learning_rate": 0.00019994826637888065,
"loss": 1.8287,
"step": 99
},
{
"epoch": 0.011720769893708705,
"grad_norm": 0.69921875,
"learning_rate": 0.00019994472026502467,
"loss": 1.8947,
"step": 102
},
{
"epoch": 0.012065498419994254,
"grad_norm": 1.828125,
"learning_rate": 0.0001999410566444677,
"loss": 1.8602,
"step": 105
},
{
"epoch": 0.012410226946279804,
"grad_norm": 0.6484375,
"learning_rate": 0.00019993727552151708,
"loss": 1.814,
"step": 108
},
{
"epoch": 0.012754955472565355,
"grad_norm": 0.62109375,
"learning_rate": 0.00019993337690061834,
"loss": 1.9383,
"step": 111
},
{
"epoch": 0.013099683998850906,
"grad_norm": 0.59765625,
"learning_rate": 0.00019992936078635509,
"loss": 1.8192,
"step": 114
},
{
"epoch": 0.013444412525136454,
"grad_norm": 0.59375,
"learning_rate": 0.00019992522718344927,
"loss": 1.8127,
"step": 117
},
{
"epoch": 0.013789141051422005,
"grad_norm": 0.65625,
"learning_rate": 0.00019992097609676073,
"loss": 1.8332,
"step": 120
}
],
"logging_steps": 3,
"max_steps": 8702,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.9279066133561344e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}