Qwarkstar-4B / trainer_state.json

qingy2024

Upload checkpoint 120

f55897e verified about 1 month ago

7.32 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.013789141051422005,
	"eval_steps": 500,
	"global_step": 120,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0003447285262855501,
	"grad_norm": 2.125,
	"learning_rate": 6e-05,
	"loss": 1.9343,
	"step": 3
	},
	{
	"epoch": 0.0006894570525711002,
	"grad_norm": 1.1015625,
	"learning_rate": 0.00012,
	"loss": 1.8061,
	"step": 6
	},
	{
	"epoch": 0.0010341855788566503,
	"grad_norm": 0.9765625,
	"learning_rate": 0.00018,
	"loss": 1.6954,
	"step": 9
	},
	{
	"epoch": 0.0013789141051422005,
	"grad_norm": 1.4296875,
	"learning_rate": 0.0001999999738729554,
	"loss": 1.7722,
	"step": 12
	},
	{
	"epoch": 0.0017236426314277506,
	"grad_norm": 0.9453125,
	"learning_rate": 0.00019999983670600854,
	"loss": 1.8276,
	"step": 15
	},
	{
	"epoch": 0.0020683711577133006,
	"grad_norm": 0.81640625,
	"learning_rate": 0.00019999958196755926,
	"loss": 1.8313,
	"step": 18
	},
	{
	"epoch": 0.0024130996839988508,
	"grad_norm": 0.7734375,
	"learning_rate": 0.00019999920965790713,
	"loss": 1.8188,
	"step": 21
	},
	{
	"epoch": 0.002757828210284401,
	"grad_norm": 1.0234375,
	"learning_rate": 0.00019999871977748987,
	"loss": 1.8129,
	"step": 24
	},
	{
	"epoch": 0.003102556736569951,
	"grad_norm": 0.83984375,
	"learning_rate": 0.00019999811232688342,
	"loss": 1.8128,
	"step": 27
	},
	{
	"epoch": 0.0034472852628555013,
	"grad_norm": 1.1875,
	"learning_rate": 0.000199997387306802,
	"loss": 1.8022,
	"step": 30
	},
	{
	"epoch": 0.0037920137891410514,
	"grad_norm": 1.25,
	"learning_rate": 0.000199996544718098,
	"loss": 1.9719,
	"step": 33
	},
	{
	"epoch": 0.004136742315426601,
	"grad_norm": 8.1875,
	"learning_rate": 0.00019999558456176205,
	"loss": 1.886,
	"step": 36
	},
	{
	"epoch": 0.004481470841712152,
	"grad_norm": 0.91015625,
	"learning_rate": 0.00019999450683892307,
	"loss": 1.8663,
	"step": 39
	},
	{
	"epoch": 0.0048261993679977015,
	"grad_norm": 0.85546875,
	"learning_rate": 0.00019999331155084812,
	"loss": 1.8967,
	"step": 42
	},
	{
	"epoch": 0.005170927894283252,
	"grad_norm": 0.6953125,
	"learning_rate": 0.00019999199869894256,
	"loss": 1.814,
	"step": 45
	},
	{
	"epoch": 0.005515656420568802,
	"grad_norm": 0.671875,
	"learning_rate": 0.0001999905682847499,
	"loss": 1.8673,
	"step": 48
	},
	{
	"epoch": 0.0058603849468543525,
	"grad_norm": 0.75390625,
	"learning_rate": 0.0001999890203099519,
	"loss": 1.7688,
	"step": 51
	},
	{
	"epoch": 0.006205113473139902,
	"grad_norm": 0.703125,
	"learning_rate": 0.00019998735477636857,
	"loss": 1.8559,
	"step": 54
	},
	{
	"epoch": 0.006549841999425453,
	"grad_norm": 0.6484375,
	"learning_rate": 0.00019998557168595803,
	"loss": 1.7875,
	"step": 57
	},
	{
	"epoch": 0.0068945705257110025,
	"grad_norm": 0.81640625,
	"learning_rate": 0.0001999836710408168,
	"loss": 1.8685,
	"step": 60
	},
	{
	"epoch": 0.007239299051996552,
	"grad_norm": 8.125,
	"learning_rate": 0.00019998165284317945,
	"loss": 1.843,
	"step": 63
	},
	{
	"epoch": 0.007584027578282103,
	"grad_norm": 6.34375,
	"learning_rate": 0.0001999795170954188,
	"loss": 1.8448,
	"step": 66
	},
	{
	"epoch": 0.007928756104567653,
	"grad_norm": 15.4375,
	"learning_rate": 0.00019997726380004585,
	"loss": 1.876,
	"step": 69
	},
	{
	"epoch": 0.008273484630853202,
	"grad_norm": 3.5,
	"learning_rate": 0.00019997489295970993,
	"loss": 1.8822,
	"step": 72
	},
	{
	"epoch": 0.008618213157138753,
	"grad_norm": 1.625,
	"learning_rate": 0.00019997240457719838,
	"loss": 1.9351,
	"step": 75
	},
	{
	"epoch": 0.008962941683424304,
	"grad_norm": 0.640625,
	"learning_rate": 0.0001999697986554369,
	"loss": 1.8457,
	"step": 78
	},
	{
	"epoch": 0.009307670209709854,
	"grad_norm": 0.65625,
	"learning_rate": 0.00019996707519748927,
	"loss": 1.8246,
	"step": 81
	},
	{
	"epoch": 0.009652398735995403,
	"grad_norm": 0.703125,
	"learning_rate": 0.00019996423420655756,
	"loss": 1.8404,
	"step": 84
	},
	{
	"epoch": 0.009997127262280954,
	"grad_norm": 0.66015625,
	"learning_rate": 0.00019996127568598193,
	"loss": 1.8068,
	"step": 87
	},
	{
	"epoch": 0.010341855788566504,
	"grad_norm": 0.796875,
	"learning_rate": 0.0001999581996392408,
	"loss": 1.7634,
	"step": 90
	},
	{
	"epoch": 0.010686584314852055,
	"grad_norm": 0.62890625,
	"learning_rate": 0.00019995500606995065,
	"loss": 1.8114,
	"step": 93
	},
	{
	"epoch": 0.011031312841137604,
	"grad_norm": 0.68359375,
	"learning_rate": 0.00019995169498186632,
	"loss": 1.7789,
	"step": 96
	},
	{
	"epoch": 0.011376041367423154,
	"grad_norm": 0.7890625,
	"learning_rate": 0.00019994826637888065,
	"loss": 1.8287,
	"step": 99
	},
	{
	"epoch": 0.011720769893708705,
	"grad_norm": 0.69921875,
	"learning_rate": 0.00019994472026502467,
	"loss": 1.8947,
	"step": 102
	},
	{
	"epoch": 0.012065498419994254,
	"grad_norm": 1.828125,
	"learning_rate": 0.0001999410566444677,
	"loss": 1.8602,
	"step": 105
	},
	{
	"epoch": 0.012410226946279804,
	"grad_norm": 0.6484375,
	"learning_rate": 0.00019993727552151708,
	"loss": 1.814,
	"step": 108
	},
	{
	"epoch": 0.012754955472565355,
	"grad_norm": 0.62109375,
	"learning_rate": 0.00019993337690061834,
	"loss": 1.9383,
	"step": 111
	},
	{
	"epoch": 0.013099683998850906,
	"grad_norm": 0.59765625,
	"learning_rate": 0.00019992936078635509,
	"loss": 1.8192,
	"step": 114
	},
	{
	"epoch": 0.013444412525136454,
	"grad_norm": 0.59375,
	"learning_rate": 0.00019992522718344927,
	"loss": 1.8127,
	"step": 117
	},
	{
	"epoch": 0.013789141051422005,
	"grad_norm": 0.65625,
	"learning_rate": 0.00019992097609676073,
	"loss": 1.8332,
	"step": 120
	}
	],
	"logging_steps": 3,
	"max_steps": 8702,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 60,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 3.9279066133561344e+17,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}