entity_extractor_v1 / checkpoint-1204 /trainer_state.json

Upload folder using huggingface_hub

f89ab20 verified 5 months ago

10.2 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 10.995433789954339,
	"eval_steps": 500,
	"global_step": 1204,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.182648401826484,
	"grad_norm": 0.4084379971027374,
	"learning_rate": 0.0001,
	"loss": 0.8312,
	"step": 20
	},
	{
	"epoch": 0.365296803652968,
	"grad_norm": 0.24725936353206635,
	"learning_rate": 0.0001,
	"loss": 0.1547,
	"step": 40
	},
	{
	"epoch": 0.547945205479452,
	"grad_norm": 0.1690889149904251,
	"learning_rate": 0.0001,
	"loss": 0.0644,
	"step": 60
	},
	{
	"epoch": 0.730593607305936,
	"grad_norm": 0.09192364662885666,
	"learning_rate": 0.0001,
	"loss": 0.0466,
	"step": 80
	},
	{
	"epoch": 0.91324200913242,
	"grad_norm": 0.08266641944646835,
	"learning_rate": 0.0001,
	"loss": 0.0385,
	"step": 100
	},
	{
	"epoch": 1.095890410958904,
	"grad_norm": 0.10168185085058212,
	"learning_rate": 0.0001,
	"loss": 0.0379,
	"step": 120
	},
	{
	"epoch": 1.278538812785388,
	"grad_norm": 0.10715723037719727,
	"learning_rate": 0.0001,
	"loss": 0.0337,
	"step": 140
	},
	{
	"epoch": 1.461187214611872,
	"grad_norm": 0.08185174316167831,
	"learning_rate": 0.0001,
	"loss": 0.0304,
	"step": 160
	},
	{
	"epoch": 1.643835616438356,
	"grad_norm": 0.0720980241894722,
	"learning_rate": 0.0001,
	"loss": 0.0342,
	"step": 180
	},
	{
	"epoch": 1.82648401826484,
	"grad_norm": 0.07974616438150406,
	"learning_rate": 0.0001,
	"loss": 0.0312,
	"step": 200
	},
	{
	"epoch": 2.009132420091324,
	"grad_norm": 0.08611268550157547,
	"learning_rate": 0.0001,
	"loss": 0.0315,
	"step": 220
	},
	{
	"epoch": 2.191780821917808,
	"grad_norm": 0.06699004024267197,
	"learning_rate": 0.0001,
	"loss": 0.0267,
	"step": 240
	},
	{
	"epoch": 2.374429223744292,
	"grad_norm": 0.1077587902545929,
	"learning_rate": 0.0001,
	"loss": 0.0246,
	"step": 260
	},
	{
	"epoch": 2.557077625570776,
	"grad_norm": 0.10352851450443268,
	"learning_rate": 0.0001,
	"loss": 0.0267,
	"step": 280
	},
	{
	"epoch": 2.73972602739726,
	"grad_norm": 0.08488716930150986,
	"learning_rate": 0.0001,
	"loss": 0.0297,
	"step": 300
	},
	{
	"epoch": 2.922374429223744,
	"grad_norm": 0.08407847583293915,
	"learning_rate": 0.0001,
	"loss": 0.0269,
	"step": 320
	},
	{
	"epoch": 3.105022831050228,
	"grad_norm": 0.0976366400718689,
	"learning_rate": 0.0001,
	"loss": 0.0251,
	"step": 340
	},
	{
	"epoch": 3.287671232876712,
	"grad_norm": 0.08240761607885361,
	"learning_rate": 0.0001,
	"loss": 0.0229,
	"step": 360
	},
	{
	"epoch": 3.470319634703196,
	"grad_norm": 0.0689239650964737,
	"learning_rate": 0.0001,
	"loss": 0.0232,
	"step": 380
	},
	{
	"epoch": 3.65296803652968,
	"grad_norm": 0.0607539638876915,
	"learning_rate": 0.0001,
	"loss": 0.0231,
	"step": 400
	},
	{
	"epoch": 3.8356164383561646,
	"grad_norm": 0.06858925521373749,
	"learning_rate": 0.0001,
	"loss": 0.023,
	"step": 420
	},
	{
	"epoch": 4.018264840182648,
	"grad_norm": 0.04049643874168396,
	"learning_rate": 0.0001,
	"loss": 0.0231,
	"step": 440
	},
	{
	"epoch": 4.200913242009133,
	"grad_norm": 0.08556920289993286,
	"learning_rate": 0.0001,
	"loss": 0.018,
	"step": 460
	},
	{
	"epoch": 4.383561643835616,
	"grad_norm": 0.05961354076862335,
	"learning_rate": 0.0001,
	"loss": 0.0183,
	"step": 480
	},
	{
	"epoch": 4.566210045662101,
	"grad_norm": 0.05691586434841156,
	"learning_rate": 0.0001,
	"loss": 0.02,
	"step": 500
	},
	{
	"epoch": 4.748858447488584,
	"grad_norm": 0.05423538759350777,
	"learning_rate": 0.0001,
	"loss": 0.0196,
	"step": 520
	},
	{
	"epoch": 4.931506849315069,
	"grad_norm": 0.10058747231960297,
	"learning_rate": 0.0001,
	"loss": 0.0206,
	"step": 540
	},
	{
	"epoch": 5.114155251141552,
	"grad_norm": 0.064676932990551,
	"learning_rate": 0.0001,
	"loss": 0.0177,
	"step": 560
	},
	{
	"epoch": 5.296803652968037,
	"grad_norm": 0.08128379285335541,
	"learning_rate": 0.0001,
	"loss": 0.0157,
	"step": 580
	},
	{
	"epoch": 5.47945205479452,
	"grad_norm": 0.10474538058042526,
	"learning_rate": 0.0001,
	"loss": 0.0169,
	"step": 600
	},
	{
	"epoch": 5.662100456621005,
	"grad_norm": 0.09420209378004074,
	"learning_rate": 0.0001,
	"loss": 0.0207,
	"step": 620
	},
	{
	"epoch": 5.844748858447488,
	"grad_norm": 0.07704417407512665,
	"learning_rate": 0.0001,
	"loss": 0.018,
	"step": 640
	},
	{
	"epoch": 6.027397260273973,
	"grad_norm": 0.044411078095436096,
	"learning_rate": 0.0001,
	"loss": 0.0168,
	"step": 660
	},
	{
	"epoch": 6.210045662100456,
	"grad_norm": 0.09763959795236588,
	"learning_rate": 0.0001,
	"loss": 0.0131,
	"step": 680
	},
	{
	"epoch": 6.392694063926941,
	"grad_norm": 0.08706251531839371,
	"learning_rate": 0.0001,
	"loss": 0.0146,
	"step": 700
	},
	{
	"epoch": 6.575342465753424,
	"grad_norm": 0.10404196381568909,
	"learning_rate": 0.0001,
	"loss": 0.0169,
	"step": 720
	},
	{
	"epoch": 6.757990867579909,
	"grad_norm": 0.1037658154964447,
	"learning_rate": 0.0001,
	"loss": 0.0165,
	"step": 740
	},
	{
	"epoch": 6.940639269406392,
	"grad_norm": 0.07572110742330551,
	"learning_rate": 0.0001,
	"loss": 0.0168,
	"step": 760
	},
	{
	"epoch": 7.123287671232877,
	"grad_norm": 0.06740553677082062,
	"learning_rate": 0.0001,
	"loss": 0.0139,
	"step": 780
	},
	{
	"epoch": 7.30593607305936,
	"grad_norm": 0.08043979108333588,
	"learning_rate": 0.0001,
	"loss": 0.014,
	"step": 800
	},
	{
	"epoch": 7.488584474885845,
	"grad_norm": 0.06607798486948013,
	"learning_rate": 0.0001,
	"loss": 0.0136,
	"step": 820
	},
	{
	"epoch": 7.671232876712329,
	"grad_norm": 0.11705009639263153,
	"learning_rate": 0.0001,
	"loss": 0.0146,
	"step": 840
	},
	{
	"epoch": 7.853881278538813,
	"grad_norm": 0.04560132324695587,
	"learning_rate": 0.0001,
	"loss": 0.0154,
	"step": 860
	},
	{
	"epoch": 8.036529680365296,
	"grad_norm": 0.05037812143564224,
	"learning_rate": 0.0001,
	"loss": 0.0129,
	"step": 880
	},
	{
	"epoch": 8.219178082191782,
	"grad_norm": 0.07135117053985596,
	"learning_rate": 0.0001,
	"loss": 0.0109,
	"step": 900
	},
	{
	"epoch": 8.401826484018265,
	"grad_norm": 0.05977578088641167,
	"learning_rate": 0.0001,
	"loss": 0.0117,
	"step": 920
	},
	{
	"epoch": 8.584474885844749,
	"grad_norm": 0.07411223649978638,
	"learning_rate": 0.0001,
	"loss": 0.0111,
	"step": 940
	},
	{
	"epoch": 8.767123287671232,
	"grad_norm": 0.08515261113643646,
	"learning_rate": 0.0001,
	"loss": 0.0122,
	"step": 960
	},
	{
	"epoch": 8.949771689497716,
	"grad_norm": 0.07383166998624802,
	"learning_rate": 0.0001,
	"loss": 0.0125,
	"step": 980
	},
	{
	"epoch": 9.132420091324201,
	"grad_norm": 0.041954681277275085,
	"learning_rate": 0.0001,
	"loss": 0.0105,
	"step": 1000
	},
	{
	"epoch": 9.315068493150685,
	"grad_norm": 0.09089387208223343,
	"learning_rate": 0.0001,
	"loss": 0.0105,
	"step": 1020
	},
	{
	"epoch": 9.497716894977168,
	"grad_norm": 0.08716876059770584,
	"learning_rate": 0.0001,
	"loss": 0.011,
	"step": 1040
	},
	{
	"epoch": 9.680365296803654,
	"grad_norm": 0.04927799850702286,
	"learning_rate": 0.0001,
	"loss": 0.0106,
	"step": 1060
	},
	{
	"epoch": 9.863013698630137,
	"grad_norm": 0.05259260907769203,
	"learning_rate": 0.0001,
	"loss": 0.0111,
	"step": 1080
	},
	{
	"epoch": 10.045662100456621,
	"grad_norm": 0.04412449151277542,
	"learning_rate": 0.0001,
	"loss": 0.0106,
	"step": 1100
	},
	{
	"epoch": 10.228310502283104,
	"grad_norm": 0.05673637241125107,
	"learning_rate": 0.0001,
	"loss": 0.0087,
	"step": 1120
	},
	{
	"epoch": 10.41095890410959,
	"grad_norm": 0.04577219486236572,
	"learning_rate": 0.0001,
	"loss": 0.0094,
	"step": 1140
	},
	{
	"epoch": 10.593607305936073,
	"grad_norm": 0.05691211298108101,
	"learning_rate": 0.0001,
	"loss": 0.0098,
	"step": 1160
	},
	{
	"epoch": 10.776255707762557,
	"grad_norm": 0.05354565382003784,
	"learning_rate": 0.0001,
	"loss": 0.01,
	"step": 1180
	},
	{
	"epoch": 10.95890410958904,
	"grad_norm": 0.06758158653974533,
	"learning_rate": 0.0001,
	"loss": 0.0104,
	"step": 1200
	}
	],
	"logging_steps": 20,
	"max_steps": 10900,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 100,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 9.136418741180006e+17,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}