End of training

5b4053e verified about 1 month ago

5.71 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.9957446808510637,
	"eval_steps": 500,
	"global_step": 264,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.11347517730496454,
	"grad_norm": 2.358112635127941,
	"learning_rate": 5e-06,
	"loss": 1.045,
	"step": 10
	},
	{
	"epoch": 0.22695035460992907,
	"grad_norm": 2.2025871190711044,
	"learning_rate": 5e-06,
	"loss": 0.939,
	"step": 20
	},
	{
	"epoch": 0.3404255319148936,
	"grad_norm": 1.183824809408691,
	"learning_rate": 5e-06,
	"loss": 0.8991,
	"step": 30
	},
	{
	"epoch": 0.45390070921985815,
	"grad_norm": 1.2551021476147783,
	"learning_rate": 5e-06,
	"loss": 0.8706,
	"step": 40
	},
	{
	"epoch": 0.5673758865248227,
	"grad_norm": 1.6978646589888085,
	"learning_rate": 5e-06,
	"loss": 0.8565,
	"step": 50
	},
	{
	"epoch": 0.6808510638297872,
	"grad_norm": 2.13778699873673,
	"learning_rate": 5e-06,
	"loss": 0.8386,
	"step": 60
	},
	{
	"epoch": 0.7943262411347518,
	"grad_norm": 0.993295806091264,
	"learning_rate": 5e-06,
	"loss": 0.8309,
	"step": 70
	},
	{
	"epoch": 0.9078014184397163,
	"grad_norm": 1.1398762974593635,
	"learning_rate": 5e-06,
	"loss": 0.823,
	"step": 80
	},
	{
	"epoch": 0.9985815602836879,
	"eval_loss": 0.8090236783027649,
	"eval_runtime": 63.3465,
	"eval_samples_per_second": 37.445,
	"eval_steps_per_second": 0.6,
	"step": 88
	},
	{
	"epoch": 1.0212765957446808,
	"grad_norm": 1.013500095467207,
	"learning_rate": 5e-06,
	"loss": 0.8742,
	"step": 90
	},
	{
	"epoch": 1.1347517730496455,
	"grad_norm": 1.056225323349834,
	"learning_rate": 5e-06,
	"loss": 0.7667,
	"step": 100
	},
	{
	"epoch": 1.24822695035461,
	"grad_norm": 0.7290196034792423,
	"learning_rate": 5e-06,
	"loss": 0.755,
	"step": 110
	},
	{
	"epoch": 1.3617021276595744,
	"grad_norm": 0.8838498260846974,
	"learning_rate": 5e-06,
	"loss": 0.7554,
	"step": 120
	},
	{
	"epoch": 1.475177304964539,
	"grad_norm": 0.821991213787815,
	"learning_rate": 5e-06,
	"loss": 0.7556,
	"step": 130
	},
	{
	"epoch": 1.5886524822695036,
	"grad_norm": 0.9855152726966359,
	"learning_rate": 5e-06,
	"loss": 0.7493,
	"step": 140
	},
	{
	"epoch": 1.702127659574468,
	"grad_norm": 0.6490086567527167,
	"learning_rate": 5e-06,
	"loss": 0.7477,
	"step": 150
	},
	{
	"epoch": 1.8156028368794326,
	"grad_norm": 1.0694149262660388,
	"learning_rate": 5e-06,
	"loss": 0.7414,
	"step": 160
	},
	{
	"epoch": 1.9290780141843973,
	"grad_norm": 0.9645011140855406,
	"learning_rate": 5e-06,
	"loss": 0.7481,
	"step": 170
	},
	{
	"epoch": 1.9971631205673759,
	"eval_loss": 0.7898643016815186,
	"eval_runtime": 62.2492,
	"eval_samples_per_second": 38.105,
	"eval_steps_per_second": 0.61,
	"step": 176
	},
	{
	"epoch": 2.0425531914893615,
	"grad_norm": 1.581859190270789,
	"learning_rate": 5e-06,
	"loss": 0.7818,
	"step": 180
	},
	{
	"epoch": 2.1560283687943262,
	"grad_norm": 1.0466470957786433,
	"learning_rate": 5e-06,
	"loss": 0.6863,
	"step": 190
	},
	{
	"epoch": 2.269503546099291,
	"grad_norm": 0.9663026123669691,
	"learning_rate": 5e-06,
	"loss": 0.6798,
	"step": 200
	},
	{
	"epoch": 2.382978723404255,
	"grad_norm": 0.8243226264574698,
	"learning_rate": 5e-06,
	"loss": 0.6826,
	"step": 210
	},
	{
	"epoch": 2.49645390070922,
	"grad_norm": 1.0907354136557872,
	"learning_rate": 5e-06,
	"loss": 0.6839,
	"step": 220
	},
	{
	"epoch": 2.6099290780141846,
	"grad_norm": 0.7996806357479502,
	"learning_rate": 5e-06,
	"loss": 0.687,
	"step": 230
	},
	{
	"epoch": 2.723404255319149,
	"grad_norm": 0.9108831837931511,
	"learning_rate": 5e-06,
	"loss": 0.6902,
	"step": 240
	},
	{
	"epoch": 2.8368794326241136,
	"grad_norm": 0.8473372600949097,
	"learning_rate": 5e-06,
	"loss": 0.6873,
	"step": 250
	},
	{
	"epoch": 2.950354609929078,
	"grad_norm": 0.8244777156304377,
	"learning_rate": 5e-06,
	"loss": 0.6866,
	"step": 260
	},
	{
	"epoch": 2.9957446808510637,
	"eval_loss": 0.7845782041549683,
	"eval_runtime": 58.8988,
	"eval_samples_per_second": 40.272,
	"eval_steps_per_second": 0.645,
	"step": 264
	},
	{
	"epoch": 2.9957446808510637,
	"step": 264,
	"total_flos": 442000453140480.0,
	"train_loss": 0.7778021304896383,
	"train_runtime": 8882.1716,
	"train_samples_per_second": 15.22,
	"train_steps_per_second": 0.03
	}
	],
	"logging_steps": 10,
	"max_steps": 264,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 442000453140480.0,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}