flan-t5-small-title / trainer_state.json
agentlans's picture
Upload 10 files
a2fd03b verified
raw
history blame
12.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 27270,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.18335166850018336,
"grad_norm": 3.5391387939453125,
"learning_rate": 4.908324165749908e-05,
"loss": 2.4276,
"num_input_tokens_seen": 1750768,
"step": 500
},
{
"epoch": 0.3667033370003667,
"grad_norm": 3.4214749336242676,
"learning_rate": 4.816648331499817e-05,
"loss": 2.2532,
"num_input_tokens_seen": 3485632,
"step": 1000
},
{
"epoch": 0.5500550055005501,
"grad_norm": 3.533691167831421,
"learning_rate": 4.724972497249725e-05,
"loss": 2.1894,
"num_input_tokens_seen": 5230688,
"step": 1500
},
{
"epoch": 0.7334066740007334,
"grad_norm": 3.7089884281158447,
"learning_rate": 4.633296662999633e-05,
"loss": 2.1511,
"num_input_tokens_seen": 6971344,
"step": 2000
},
{
"epoch": 0.9167583425009168,
"grad_norm": 4.088582515716553,
"learning_rate": 4.541620828749542e-05,
"loss": 2.1089,
"num_input_tokens_seen": 8738136,
"step": 2500
},
{
"epoch": 1.1001100110011002,
"grad_norm": 4.8249077796936035,
"learning_rate": 4.449944994499451e-05,
"loss": 2.0594,
"num_input_tokens_seen": 10466350,
"step": 3000
},
{
"epoch": 1.2834616795012834,
"grad_norm": 3.9551169872283936,
"learning_rate": 4.358269160249359e-05,
"loss": 2.0194,
"num_input_tokens_seen": 12222070,
"step": 3500
},
{
"epoch": 1.466813348001467,
"grad_norm": 3.0416815280914307,
"learning_rate": 4.266593325999267e-05,
"loss": 2.0019,
"num_input_tokens_seen": 13976918,
"step": 4000
},
{
"epoch": 1.6501650165016502,
"grad_norm": 3.295426607131958,
"learning_rate": 4.174917491749175e-05,
"loss": 2.0024,
"num_input_tokens_seen": 15721702,
"step": 4500
},
{
"epoch": 1.8335166850018334,
"grad_norm": 4.8525309562683105,
"learning_rate": 4.0832416574990836e-05,
"loss": 1.9935,
"num_input_tokens_seen": 17458590,
"step": 5000
},
{
"epoch": 2.0168683535020167,
"grad_norm": 4.256695747375488,
"learning_rate": 3.991565823248992e-05,
"loss": 1.9769,
"num_input_tokens_seen": 19188010,
"step": 5500
},
{
"epoch": 2.2002200220022003,
"grad_norm": 4.129441738128662,
"learning_rate": 3.8998899889989e-05,
"loss": 1.9108,
"num_input_tokens_seen": 20932210,
"step": 6000
},
{
"epoch": 2.3835716905023836,
"grad_norm": 2.544461250305176,
"learning_rate": 3.808214154748808e-05,
"loss": 1.9047,
"num_input_tokens_seen": 22658578,
"step": 6500
},
{
"epoch": 2.566923359002567,
"grad_norm": 4.752838611602783,
"learning_rate": 3.716538320498717e-05,
"loss": 1.9119,
"num_input_tokens_seen": 24411482,
"step": 7000
},
{
"epoch": 2.7502750275027505,
"grad_norm": 4.965038776397705,
"learning_rate": 3.624862486248625e-05,
"loss": 1.8986,
"num_input_tokens_seen": 26157770,
"step": 7500
},
{
"epoch": 2.933626696002934,
"grad_norm": 4.416258335113525,
"learning_rate": 3.5331866519985334e-05,
"loss": 1.9086,
"num_input_tokens_seen": 27912394,
"step": 8000
},
{
"epoch": 3.116978364503117,
"grad_norm": 3.501598596572876,
"learning_rate": 3.4415108177484414e-05,
"loss": 1.868,
"num_input_tokens_seen": 29671328,
"step": 8500
},
{
"epoch": 3.3003300330033003,
"grad_norm": 3.8959696292877197,
"learning_rate": 3.34983498349835e-05,
"loss": 1.8465,
"num_input_tokens_seen": 31405544,
"step": 9000
},
{
"epoch": 3.4836817015034836,
"grad_norm": 3.5625758171081543,
"learning_rate": 3.258159149248258e-05,
"loss": 1.8463,
"num_input_tokens_seen": 33146784,
"step": 9500
},
{
"epoch": 3.667033370003667,
"grad_norm": 3.303110122680664,
"learning_rate": 3.166483314998166e-05,
"loss": 1.8394,
"num_input_tokens_seen": 34888072,
"step": 10000
},
{
"epoch": 3.8503850385038505,
"grad_norm": 3.5172908306121826,
"learning_rate": 3.074807480748075e-05,
"loss": 1.8379,
"num_input_tokens_seen": 36645960,
"step": 10500
},
{
"epoch": 4.033736707004033,
"grad_norm": 4.386786460876465,
"learning_rate": 2.983131646497983e-05,
"loss": 1.8245,
"num_input_tokens_seen": 38388631,
"step": 11000
},
{
"epoch": 4.2170883755042174,
"grad_norm": 3.2586567401885986,
"learning_rate": 2.891455812247892e-05,
"loss": 1.8029,
"num_input_tokens_seen": 40139079,
"step": 11500
},
{
"epoch": 4.400440044004401,
"grad_norm": 3.6384007930755615,
"learning_rate": 2.7997799779978003e-05,
"loss": 1.7909,
"num_input_tokens_seen": 41872751,
"step": 12000
},
{
"epoch": 4.583791712504584,
"grad_norm": 4.475183486938477,
"learning_rate": 2.7081041437477084e-05,
"loss": 1.791,
"num_input_tokens_seen": 43618911,
"step": 12500
},
{
"epoch": 4.767143381004767,
"grad_norm": 4.72713041305542,
"learning_rate": 2.6164283094976168e-05,
"loss": 1.7745,
"num_input_tokens_seen": 45373143,
"step": 13000
},
{
"epoch": 4.9504950495049505,
"grad_norm": 3.3076839447021484,
"learning_rate": 2.5247524752475248e-05,
"loss": 1.7968,
"num_input_tokens_seen": 47112151,
"step": 13500
},
{
"epoch": 5.133846718005134,
"grad_norm": 4.046383857727051,
"learning_rate": 2.4330766409974332e-05,
"loss": 1.7611,
"num_input_tokens_seen": 48852751,
"step": 14000
},
{
"epoch": 5.317198386505317,
"grad_norm": 3.291144609451294,
"learning_rate": 2.3414008067473413e-05,
"loss": 1.7363,
"num_input_tokens_seen": 50602567,
"step": 14500
},
{
"epoch": 5.5005500550055,
"grad_norm": 4.23388671875,
"learning_rate": 2.24972497249725e-05,
"loss": 1.7814,
"num_input_tokens_seen": 52369863,
"step": 15000
},
{
"epoch": 5.683901723505684,
"grad_norm": 3.1835505962371826,
"learning_rate": 2.158049138247158e-05,
"loss": 1.751,
"num_input_tokens_seen": 54115983,
"step": 15500
},
{
"epoch": 5.867253392005868,
"grad_norm": 3.593493938446045,
"learning_rate": 2.0663733039970665e-05,
"loss": 1.7481,
"num_input_tokens_seen": 55853919,
"step": 16000
},
{
"epoch": 6.050605060506051,
"grad_norm": 4.3933258056640625,
"learning_rate": 1.9746974697469746e-05,
"loss": 1.7506,
"num_input_tokens_seen": 57581239,
"step": 16500
},
{
"epoch": 6.233956729006234,
"grad_norm": 3.6081910133361816,
"learning_rate": 1.883021635496883e-05,
"loss": 1.7294,
"num_input_tokens_seen": 59313735,
"step": 17000
},
{
"epoch": 6.417308397506417,
"grad_norm": 3.7784392833709717,
"learning_rate": 1.7913458012467914e-05,
"loss": 1.719,
"num_input_tokens_seen": 61061911,
"step": 17500
},
{
"epoch": 6.600660066006601,
"grad_norm": 3.5482571125030518,
"learning_rate": 1.6996699669966998e-05,
"loss": 1.7184,
"num_input_tokens_seen": 62802279,
"step": 18000
},
{
"epoch": 6.784011734506784,
"grad_norm": 3.797348737716675,
"learning_rate": 1.6079941327466082e-05,
"loss": 1.7101,
"num_input_tokens_seen": 64536303,
"step": 18500
},
{
"epoch": 6.967363403006967,
"grad_norm": 3.9275312423706055,
"learning_rate": 1.5163182984965163e-05,
"loss": 1.7153,
"num_input_tokens_seen": 66282967,
"step": 19000
},
{
"epoch": 7.15071507150715,
"grad_norm": 3.65077805519104,
"learning_rate": 1.4246424642464248e-05,
"loss": 1.7181,
"num_input_tokens_seen": 68030296,
"step": 19500
},
{
"epoch": 7.334066740007334,
"grad_norm": 4.696651458740234,
"learning_rate": 1.3329666299963331e-05,
"loss": 1.6992,
"num_input_tokens_seen": 69767824,
"step": 20000
},
{
"epoch": 7.517418408507518,
"grad_norm": 5.405508518218994,
"learning_rate": 1.2412907957462413e-05,
"loss": 1.6903,
"num_input_tokens_seen": 71509128,
"step": 20500
},
{
"epoch": 7.700770077007701,
"grad_norm": 3.7343809604644775,
"learning_rate": 1.1496149614961496e-05,
"loss": 1.7019,
"num_input_tokens_seen": 73255224,
"step": 21000
},
{
"epoch": 7.884121745507884,
"grad_norm": 4.133444786071777,
"learning_rate": 1.057939127246058e-05,
"loss": 1.6959,
"num_input_tokens_seen": 75002496,
"step": 21500
},
{
"epoch": 8.067473414008067,
"grad_norm": 4.398416996002197,
"learning_rate": 9.662632929959662e-06,
"loss": 1.7018,
"num_input_tokens_seen": 76756073,
"step": 22000
},
{
"epoch": 8.250825082508252,
"grad_norm": 4.565046310424805,
"learning_rate": 8.745874587458746e-06,
"loss": 1.6837,
"num_input_tokens_seen": 78483465,
"step": 22500
},
{
"epoch": 8.434176751008435,
"grad_norm": 3.950497627258301,
"learning_rate": 7.829116244957828e-06,
"loss": 1.6913,
"num_input_tokens_seen": 80220865,
"step": 23000
},
{
"epoch": 8.617528419508618,
"grad_norm": 3.9700405597686768,
"learning_rate": 6.912357902456913e-06,
"loss": 1.6814,
"num_input_tokens_seen": 81964649,
"step": 23500
},
{
"epoch": 8.800880088008801,
"grad_norm": 3.21114444732666,
"learning_rate": 5.995599559955996e-06,
"loss": 1.689,
"num_input_tokens_seen": 83718889,
"step": 24000
},
{
"epoch": 8.984231756508985,
"grad_norm": 3.5966849327087402,
"learning_rate": 5.078841217455079e-06,
"loss": 1.6734,
"num_input_tokens_seen": 85471529,
"step": 24500
},
{
"epoch": 9.167583425009168,
"grad_norm": 3.4596688747406006,
"learning_rate": 4.162082874954162e-06,
"loss": 1.6792,
"num_input_tokens_seen": 87214771,
"step": 25000
},
{
"epoch": 9.350935093509351,
"grad_norm": 3.9838054180145264,
"learning_rate": 3.2453245324532458e-06,
"loss": 1.6583,
"num_input_tokens_seen": 88949475,
"step": 25500
},
{
"epoch": 9.534286762009534,
"grad_norm": 3.389430522918701,
"learning_rate": 2.3285661899523286e-06,
"loss": 1.6836,
"num_input_tokens_seen": 90694267,
"step": 26000
},
{
"epoch": 9.717638430509718,
"grad_norm": 4.560466289520264,
"learning_rate": 1.411807847451412e-06,
"loss": 1.6804,
"num_input_tokens_seen": 92441267,
"step": 26500
},
{
"epoch": 9.900990099009901,
"grad_norm": 4.484193325042725,
"learning_rate": 4.950495049504951e-07,
"loss": 1.6876,
"num_input_tokens_seen": 94186835,
"step": 27000
},
{
"epoch": 10.0,
"num_input_tokens_seen": 95128823,
"step": 27270,
"total_flos": 3.4538173670639616e+16,
"train_loss": 1.8293144167322006,
"train_runtime": 2454.5506,
"train_samples_per_second": 88.859,
"train_steps_per_second": 11.11,
"train_tokens_per_second": 38762.215
}
],
"logging_steps": 500,
"max_steps": 27270,
"num_input_tokens_seen": 95128823,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.4538173670639616e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}