shubhamrathore081's picture
Upload folder using huggingface_hub
f89ab20 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.995433789954339,
"eval_steps": 500,
"global_step": 1204,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.182648401826484,
"grad_norm": 0.4084379971027374,
"learning_rate": 0.0001,
"loss": 0.8312,
"step": 20
},
{
"epoch": 0.365296803652968,
"grad_norm": 0.24725936353206635,
"learning_rate": 0.0001,
"loss": 0.1547,
"step": 40
},
{
"epoch": 0.547945205479452,
"grad_norm": 0.1690889149904251,
"learning_rate": 0.0001,
"loss": 0.0644,
"step": 60
},
{
"epoch": 0.730593607305936,
"grad_norm": 0.09192364662885666,
"learning_rate": 0.0001,
"loss": 0.0466,
"step": 80
},
{
"epoch": 0.91324200913242,
"grad_norm": 0.08266641944646835,
"learning_rate": 0.0001,
"loss": 0.0385,
"step": 100
},
{
"epoch": 1.095890410958904,
"grad_norm": 0.10168185085058212,
"learning_rate": 0.0001,
"loss": 0.0379,
"step": 120
},
{
"epoch": 1.278538812785388,
"grad_norm": 0.10715723037719727,
"learning_rate": 0.0001,
"loss": 0.0337,
"step": 140
},
{
"epoch": 1.461187214611872,
"grad_norm": 0.08185174316167831,
"learning_rate": 0.0001,
"loss": 0.0304,
"step": 160
},
{
"epoch": 1.643835616438356,
"grad_norm": 0.0720980241894722,
"learning_rate": 0.0001,
"loss": 0.0342,
"step": 180
},
{
"epoch": 1.82648401826484,
"grad_norm": 0.07974616438150406,
"learning_rate": 0.0001,
"loss": 0.0312,
"step": 200
},
{
"epoch": 2.009132420091324,
"grad_norm": 0.08611268550157547,
"learning_rate": 0.0001,
"loss": 0.0315,
"step": 220
},
{
"epoch": 2.191780821917808,
"grad_norm": 0.06699004024267197,
"learning_rate": 0.0001,
"loss": 0.0267,
"step": 240
},
{
"epoch": 2.374429223744292,
"grad_norm": 0.1077587902545929,
"learning_rate": 0.0001,
"loss": 0.0246,
"step": 260
},
{
"epoch": 2.557077625570776,
"grad_norm": 0.10352851450443268,
"learning_rate": 0.0001,
"loss": 0.0267,
"step": 280
},
{
"epoch": 2.73972602739726,
"grad_norm": 0.08488716930150986,
"learning_rate": 0.0001,
"loss": 0.0297,
"step": 300
},
{
"epoch": 2.922374429223744,
"grad_norm": 0.08407847583293915,
"learning_rate": 0.0001,
"loss": 0.0269,
"step": 320
},
{
"epoch": 3.105022831050228,
"grad_norm": 0.0976366400718689,
"learning_rate": 0.0001,
"loss": 0.0251,
"step": 340
},
{
"epoch": 3.287671232876712,
"grad_norm": 0.08240761607885361,
"learning_rate": 0.0001,
"loss": 0.0229,
"step": 360
},
{
"epoch": 3.470319634703196,
"grad_norm": 0.0689239650964737,
"learning_rate": 0.0001,
"loss": 0.0232,
"step": 380
},
{
"epoch": 3.65296803652968,
"grad_norm": 0.0607539638876915,
"learning_rate": 0.0001,
"loss": 0.0231,
"step": 400
},
{
"epoch": 3.8356164383561646,
"grad_norm": 0.06858925521373749,
"learning_rate": 0.0001,
"loss": 0.023,
"step": 420
},
{
"epoch": 4.018264840182648,
"grad_norm": 0.04049643874168396,
"learning_rate": 0.0001,
"loss": 0.0231,
"step": 440
},
{
"epoch": 4.200913242009133,
"grad_norm": 0.08556920289993286,
"learning_rate": 0.0001,
"loss": 0.018,
"step": 460
},
{
"epoch": 4.383561643835616,
"grad_norm": 0.05961354076862335,
"learning_rate": 0.0001,
"loss": 0.0183,
"step": 480
},
{
"epoch": 4.566210045662101,
"grad_norm": 0.05691586434841156,
"learning_rate": 0.0001,
"loss": 0.02,
"step": 500
},
{
"epoch": 4.748858447488584,
"grad_norm": 0.05423538759350777,
"learning_rate": 0.0001,
"loss": 0.0196,
"step": 520
},
{
"epoch": 4.931506849315069,
"grad_norm": 0.10058747231960297,
"learning_rate": 0.0001,
"loss": 0.0206,
"step": 540
},
{
"epoch": 5.114155251141552,
"grad_norm": 0.064676932990551,
"learning_rate": 0.0001,
"loss": 0.0177,
"step": 560
},
{
"epoch": 5.296803652968037,
"grad_norm": 0.08128379285335541,
"learning_rate": 0.0001,
"loss": 0.0157,
"step": 580
},
{
"epoch": 5.47945205479452,
"grad_norm": 0.10474538058042526,
"learning_rate": 0.0001,
"loss": 0.0169,
"step": 600
},
{
"epoch": 5.662100456621005,
"grad_norm": 0.09420209378004074,
"learning_rate": 0.0001,
"loss": 0.0207,
"step": 620
},
{
"epoch": 5.844748858447488,
"grad_norm": 0.07704417407512665,
"learning_rate": 0.0001,
"loss": 0.018,
"step": 640
},
{
"epoch": 6.027397260273973,
"grad_norm": 0.044411078095436096,
"learning_rate": 0.0001,
"loss": 0.0168,
"step": 660
},
{
"epoch": 6.210045662100456,
"grad_norm": 0.09763959795236588,
"learning_rate": 0.0001,
"loss": 0.0131,
"step": 680
},
{
"epoch": 6.392694063926941,
"grad_norm": 0.08706251531839371,
"learning_rate": 0.0001,
"loss": 0.0146,
"step": 700
},
{
"epoch": 6.575342465753424,
"grad_norm": 0.10404196381568909,
"learning_rate": 0.0001,
"loss": 0.0169,
"step": 720
},
{
"epoch": 6.757990867579909,
"grad_norm": 0.1037658154964447,
"learning_rate": 0.0001,
"loss": 0.0165,
"step": 740
},
{
"epoch": 6.940639269406392,
"grad_norm": 0.07572110742330551,
"learning_rate": 0.0001,
"loss": 0.0168,
"step": 760
},
{
"epoch": 7.123287671232877,
"grad_norm": 0.06740553677082062,
"learning_rate": 0.0001,
"loss": 0.0139,
"step": 780
},
{
"epoch": 7.30593607305936,
"grad_norm": 0.08043979108333588,
"learning_rate": 0.0001,
"loss": 0.014,
"step": 800
},
{
"epoch": 7.488584474885845,
"grad_norm": 0.06607798486948013,
"learning_rate": 0.0001,
"loss": 0.0136,
"step": 820
},
{
"epoch": 7.671232876712329,
"grad_norm": 0.11705009639263153,
"learning_rate": 0.0001,
"loss": 0.0146,
"step": 840
},
{
"epoch": 7.853881278538813,
"grad_norm": 0.04560132324695587,
"learning_rate": 0.0001,
"loss": 0.0154,
"step": 860
},
{
"epoch": 8.036529680365296,
"grad_norm": 0.05037812143564224,
"learning_rate": 0.0001,
"loss": 0.0129,
"step": 880
},
{
"epoch": 8.219178082191782,
"grad_norm": 0.07135117053985596,
"learning_rate": 0.0001,
"loss": 0.0109,
"step": 900
},
{
"epoch": 8.401826484018265,
"grad_norm": 0.05977578088641167,
"learning_rate": 0.0001,
"loss": 0.0117,
"step": 920
},
{
"epoch": 8.584474885844749,
"grad_norm": 0.07411223649978638,
"learning_rate": 0.0001,
"loss": 0.0111,
"step": 940
},
{
"epoch": 8.767123287671232,
"grad_norm": 0.08515261113643646,
"learning_rate": 0.0001,
"loss": 0.0122,
"step": 960
},
{
"epoch": 8.949771689497716,
"grad_norm": 0.07383166998624802,
"learning_rate": 0.0001,
"loss": 0.0125,
"step": 980
},
{
"epoch": 9.132420091324201,
"grad_norm": 0.041954681277275085,
"learning_rate": 0.0001,
"loss": 0.0105,
"step": 1000
},
{
"epoch": 9.315068493150685,
"grad_norm": 0.09089387208223343,
"learning_rate": 0.0001,
"loss": 0.0105,
"step": 1020
},
{
"epoch": 9.497716894977168,
"grad_norm": 0.08716876059770584,
"learning_rate": 0.0001,
"loss": 0.011,
"step": 1040
},
{
"epoch": 9.680365296803654,
"grad_norm": 0.04927799850702286,
"learning_rate": 0.0001,
"loss": 0.0106,
"step": 1060
},
{
"epoch": 9.863013698630137,
"grad_norm": 0.05259260907769203,
"learning_rate": 0.0001,
"loss": 0.0111,
"step": 1080
},
{
"epoch": 10.045662100456621,
"grad_norm": 0.04412449151277542,
"learning_rate": 0.0001,
"loss": 0.0106,
"step": 1100
},
{
"epoch": 10.228310502283104,
"grad_norm": 0.05673637241125107,
"learning_rate": 0.0001,
"loss": 0.0087,
"step": 1120
},
{
"epoch": 10.41095890410959,
"grad_norm": 0.04577219486236572,
"learning_rate": 0.0001,
"loss": 0.0094,
"step": 1140
},
{
"epoch": 10.593607305936073,
"grad_norm": 0.05691211298108101,
"learning_rate": 0.0001,
"loss": 0.0098,
"step": 1160
},
{
"epoch": 10.776255707762557,
"grad_norm": 0.05354565382003784,
"learning_rate": 0.0001,
"loss": 0.01,
"step": 1180
},
{
"epoch": 10.95890410958904,
"grad_norm": 0.06758158653974533,
"learning_rate": 0.0001,
"loss": 0.0104,
"step": 1200
}
],
"logging_steps": 20,
"max_steps": 10900,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 9.136418741180006e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}