llama2-7B-backdoor-DEPLOYMENT / trainer_state.json
“Sara
adding model files
b088fb3
raw
history blame
15.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.153846153846154,
"eval_steps": 50,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.15384615384615385,
"grad_norm": 25.85894203186035,
"learning_rate": 3.0769230769230774e-06,
"loss": 1.0987,
"step": 50
},
{
"epoch": 0.15384615384615385,
"eval_loss": 0.7039205431938171,
"eval_runtime": 1.7807,
"eval_samples_per_second": 64.02,
"eval_steps_per_second": 3.369,
"step": 50
},
{
"epoch": 0.3076923076923077,
"grad_norm": 10.326680183410645,
"learning_rate": 6.153846153846155e-06,
"loss": 0.6388,
"step": 100
},
{
"epoch": 0.3076923076923077,
"eval_loss": 0.6586376428604126,
"eval_runtime": 1.795,
"eval_samples_per_second": 63.511,
"eval_steps_per_second": 3.343,
"step": 100
},
{
"epoch": 0.46153846153846156,
"grad_norm": 2.875251531600952,
"learning_rate": 9.230769230769232e-06,
"loss": 0.6463,
"step": 150
},
{
"epoch": 0.46153846153846156,
"eval_loss": 0.6574278473854065,
"eval_runtime": 1.7356,
"eval_samples_per_second": 65.682,
"eval_steps_per_second": 3.457,
"step": 150
},
{
"epoch": 0.6153846153846154,
"grad_norm": 2.7418408393859863,
"learning_rate": 1.230769230769231e-05,
"loss": 0.6721,
"step": 200
},
{
"epoch": 0.6153846153846154,
"eval_loss": 0.6629524827003479,
"eval_runtime": 1.7437,
"eval_samples_per_second": 65.378,
"eval_steps_per_second": 3.441,
"step": 200
},
{
"epoch": 0.7692307692307693,
"grad_norm": 1.8382577896118164,
"learning_rate": 1.5384615384615387e-05,
"loss": 0.6288,
"step": 250
},
{
"epoch": 0.7692307692307693,
"eval_loss": 0.673968493938446,
"eval_runtime": 1.8191,
"eval_samples_per_second": 62.667,
"eval_steps_per_second": 3.298,
"step": 250
},
{
"epoch": 0.9230769230769231,
"grad_norm": 2.8036742210388184,
"learning_rate": 1.8461538461538465e-05,
"loss": 0.7276,
"step": 300
},
{
"epoch": 0.9230769230769231,
"eval_loss": 0.6912825107574463,
"eval_runtime": 1.7707,
"eval_samples_per_second": 64.38,
"eval_steps_per_second": 3.388,
"step": 300
},
{
"epoch": 1.0769230769230769,
"grad_norm": 1.469930648803711,
"learning_rate": 1.9996395276708856e-05,
"loss": 0.5365,
"step": 350
},
{
"epoch": 1.0769230769230769,
"eval_loss": 0.7528238296508789,
"eval_runtime": 1.7853,
"eval_samples_per_second": 63.856,
"eval_steps_per_second": 3.361,
"step": 350
},
{
"epoch": 1.2307692307692308,
"grad_norm": 2.5635602474212646,
"learning_rate": 1.9967573081342103e-05,
"loss": 0.4279,
"step": 400
},
{
"epoch": 1.2307692307692308,
"eval_loss": 0.7624219655990601,
"eval_runtime": 1.8133,
"eval_samples_per_second": 62.869,
"eval_steps_per_second": 3.309,
"step": 400
},
{
"epoch": 1.3846153846153846,
"grad_norm": 1.6066973209381104,
"learning_rate": 1.9910011792459086e-05,
"loss": 0.42,
"step": 450
},
{
"epoch": 1.3846153846153846,
"eval_loss": 0.7675647139549255,
"eval_runtime": 1.7503,
"eval_samples_per_second": 65.132,
"eval_steps_per_second": 3.428,
"step": 450
},
{
"epoch": 1.5384615384615383,
"grad_norm": 2.782292604446411,
"learning_rate": 1.9823877374156647e-05,
"loss": 0.4284,
"step": 500
},
{
"epoch": 1.5384615384615383,
"eval_loss": 0.7663527131080627,
"eval_runtime": 1.7731,
"eval_samples_per_second": 64.293,
"eval_steps_per_second": 3.384,
"step": 500
},
{
"epoch": 1.6923076923076923,
"grad_norm": 1.985113263130188,
"learning_rate": 1.9709418174260523e-05,
"loss": 0.4615,
"step": 550
},
{
"epoch": 1.6923076923076923,
"eval_loss": 0.7588106989860535,
"eval_runtime": 1.743,
"eval_samples_per_second": 65.406,
"eval_steps_per_second": 3.442,
"step": 550
},
{
"epoch": 1.8461538461538463,
"grad_norm": 3.593240261077881,
"learning_rate": 1.9566964208274254e-05,
"loss": 0.4493,
"step": 600
},
{
"epoch": 1.8461538461538463,
"eval_loss": 0.7685819864273071,
"eval_runtime": 1.7745,
"eval_samples_per_second": 64.242,
"eval_steps_per_second": 3.381,
"step": 600
},
{
"epoch": 2.0,
"grad_norm": 2.46645450592041,
"learning_rate": 1.9396926207859085e-05,
"loss": 0.4103,
"step": 650
},
{
"epoch": 2.0,
"eval_loss": 0.7486200928688049,
"eval_runtime": 1.7602,
"eval_samples_per_second": 64.766,
"eval_steps_per_second": 3.409,
"step": 650
},
{
"epoch": 2.1538461538461537,
"grad_norm": 2.113468647003174,
"learning_rate": 1.9199794436588244e-05,
"loss": 0.2133,
"step": 700
},
{
"epoch": 2.1538461538461537,
"eval_loss": 0.8937916159629822,
"eval_runtime": 1.7444,
"eval_samples_per_second": 65.35,
"eval_steps_per_second": 3.439,
"step": 700
},
{
"epoch": 2.3076923076923075,
"grad_norm": 1.9733953475952148,
"learning_rate": 1.8976137276390145e-05,
"loss": 0.2181,
"step": 750
},
{
"epoch": 2.3076923076923075,
"eval_loss": 0.8633579611778259,
"eval_runtime": 1.7477,
"eval_samples_per_second": 65.228,
"eval_steps_per_second": 3.433,
"step": 750
},
{
"epoch": 2.4615384615384617,
"grad_norm": 1.7347004413604736,
"learning_rate": 1.8726599588756144e-05,
"loss": 0.2005,
"step": 800
},
{
"epoch": 2.4615384615384617,
"eval_loss": 0.8777443766593933,
"eval_runtime": 1.7381,
"eval_samples_per_second": 65.588,
"eval_steps_per_second": 3.452,
"step": 800
},
{
"epoch": 2.6153846153846154,
"grad_norm": 1.5702624320983887,
"learning_rate": 1.845190085543795e-05,
"loss": 0.2157,
"step": 850
},
{
"epoch": 2.6153846153846154,
"eval_loss": 0.8784948587417603,
"eval_runtime": 1.7673,
"eval_samples_per_second": 64.505,
"eval_steps_per_second": 3.395,
"step": 850
},
{
"epoch": 2.769230769230769,
"grad_norm": 2.3403327465057373,
"learning_rate": 1.8152833103995443e-05,
"loss": 0.2115,
"step": 900
},
{
"epoch": 2.769230769230769,
"eval_loss": 0.8863385915756226,
"eval_runtime": 1.7696,
"eval_samples_per_second": 64.421,
"eval_steps_per_second": 3.391,
"step": 900
},
{
"epoch": 2.9230769230769234,
"grad_norm": 1.9724828004837036,
"learning_rate": 1.7830258624176224e-05,
"loss": 0.2225,
"step": 950
},
{
"epoch": 2.9230769230769234,
"eval_loss": 0.8796689510345459,
"eval_runtime": 1.7943,
"eval_samples_per_second": 63.534,
"eval_steps_per_second": 3.344,
"step": 950
},
{
"epoch": 3.076923076923077,
"grad_norm": 1.9262828826904297,
"learning_rate": 1.7485107481711014e-05,
"loss": 0.1693,
"step": 1000
},
{
"epoch": 3.076923076923077,
"eval_loss": 0.9404221177101135,
"eval_runtime": 1.8296,
"eval_samples_per_second": 62.309,
"eval_steps_per_second": 3.279,
"step": 1000
},
{
"epoch": 3.230769230769231,
"grad_norm": 1.1772091388702393,
"learning_rate": 1.7118374836693407e-05,
"loss": 0.1256,
"step": 1050
},
{
"epoch": 3.230769230769231,
"eval_loss": 0.9360042810440063,
"eval_runtime": 1.8904,
"eval_samples_per_second": 60.305,
"eval_steps_per_second": 3.174,
"step": 1050
},
{
"epoch": 3.3846153846153846,
"grad_norm": 1.739654302597046,
"learning_rate": 1.67311180742757e-05,
"loss": 0.125,
"step": 1100
},
{
"epoch": 3.3846153846153846,
"eval_loss": 0.9563117623329163,
"eval_runtime": 1.7652,
"eval_samples_per_second": 64.581,
"eval_steps_per_second": 3.399,
"step": 1100
},
{
"epoch": 3.5384615384615383,
"grad_norm": 1.3015375137329102,
"learning_rate": 1.6324453755953772e-05,
"loss": 0.1281,
"step": 1150
},
{
"epoch": 3.5384615384615383,
"eval_loss": 0.9608204960823059,
"eval_runtime": 1.7445,
"eval_samples_per_second": 65.347,
"eval_steps_per_second": 3.439,
"step": 1150
},
{
"epoch": 3.6923076923076925,
"grad_norm": 2.1856415271759033,
"learning_rate": 1.5899554400231233e-05,
"loss": 0.1264,
"step": 1200
},
{
"epoch": 3.6923076923076925,
"eval_loss": 0.9677081108093262,
"eval_runtime": 1.7748,
"eval_samples_per_second": 64.233,
"eval_steps_per_second": 3.381,
"step": 1200
},
{
"epoch": 3.8461538461538463,
"grad_norm": 1.8629130125045776,
"learning_rate": 1.5457645101945046e-05,
"loss": 0.1267,
"step": 1250
},
{
"epoch": 3.8461538461538463,
"eval_loss": 0.9708028435707092,
"eval_runtime": 1.7595,
"eval_samples_per_second": 64.791,
"eval_steps_per_second": 3.41,
"step": 1250
},
{
"epoch": 4.0,
"grad_norm": 2.2338545322418213,
"learning_rate": 1.5000000000000002e-05,
"loss": 0.1324,
"step": 1300
},
{
"epoch": 4.0,
"eval_loss": 0.9690905809402466,
"eval_runtime": 1.7742,
"eval_samples_per_second": 64.255,
"eval_steps_per_second": 3.382,
"step": 1300
},
{
"epoch": 4.153846153846154,
"grad_norm": 1.040000557899475,
"learning_rate": 1.4527938603696376e-05,
"loss": 0.0931,
"step": 1350
},
{
"epoch": 4.153846153846154,
"eval_loss": 0.9888765811920166,
"eval_runtime": 1.7454,
"eval_samples_per_second": 65.315,
"eval_steps_per_second": 3.438,
"step": 1350
},
{
"epoch": 4.3076923076923075,
"grad_norm": 0.49024778604507446,
"learning_rate": 1.404282198824305e-05,
"loss": 0.0829,
"step": 1400
},
{
"epoch": 4.3076923076923075,
"eval_loss": 1.0040687322616577,
"eval_runtime": 1.798,
"eval_samples_per_second": 63.406,
"eval_steps_per_second": 3.337,
"step": 1400
},
{
"epoch": 4.461538461538462,
"grad_norm": 1.1068203449249268,
"learning_rate": 1.3546048870425356e-05,
"loss": 0.0908,
"step": 1450
},
{
"epoch": 4.461538461538462,
"eval_loss": 1.0206270217895508,
"eval_runtime": 1.7439,
"eval_samples_per_second": 65.37,
"eval_steps_per_second": 3.441,
"step": 1450
},
{
"epoch": 4.615384615384615,
"grad_norm": 1.7601001262664795,
"learning_rate": 1.303905157574247e-05,
"loss": 0.0845,
"step": 1500
},
{
"epoch": 4.615384615384615,
"eval_loss": 1.026615023612976,
"eval_runtime": 1.7715,
"eval_samples_per_second": 64.352,
"eval_steps_per_second": 3.387,
"step": 1500
},
{
"epoch": 4.769230769230769,
"grad_norm": 1.9115608930587769,
"learning_rate": 1.2523291908642219e-05,
"loss": 0.087,
"step": 1550
},
{
"epoch": 4.769230769230769,
"eval_loss": 1.03285551071167,
"eval_runtime": 1.7503,
"eval_samples_per_second": 65.13,
"eval_steps_per_second": 3.428,
"step": 1550
},
{
"epoch": 4.923076923076923,
"grad_norm": 0.8985757231712341,
"learning_rate": 1.2000256937760446e-05,
"loss": 0.0833,
"step": 1600
},
{
"epoch": 4.923076923076923,
"eval_loss": 1.0319923162460327,
"eval_runtime": 1.8324,
"eval_samples_per_second": 62.213,
"eval_steps_per_second": 3.274,
"step": 1600
},
{
"epoch": 5.076923076923077,
"grad_norm": 0.524580717086792,
"learning_rate": 1.1471454708317163e-05,
"loss": 0.0758,
"step": 1650
},
{
"epoch": 5.076923076923077,
"eval_loss": 1.0695827007293701,
"eval_runtime": 1.8442,
"eval_samples_per_second": 61.814,
"eval_steps_per_second": 3.253,
"step": 1650
},
{
"epoch": 5.230769230769231,
"grad_norm": 0.7894753813743591,
"learning_rate": 1.0938409894031793e-05,
"loss": 0.0645,
"step": 1700
},
{
"epoch": 5.230769230769231,
"eval_loss": 1.0913734436035156,
"eval_runtime": 1.77,
"eval_samples_per_second": 64.406,
"eval_steps_per_second": 3.39,
"step": 1700
},
{
"epoch": 5.384615384615385,
"grad_norm": 0.9538184404373169,
"learning_rate": 1.0402659401094154e-05,
"loss": 0.0586,
"step": 1750
},
{
"epoch": 5.384615384615385,
"eval_loss": 1.0830014944076538,
"eval_runtime": 1.7555,
"eval_samples_per_second": 64.939,
"eval_steps_per_second": 3.418,
"step": 1750
},
{
"epoch": 5.538461538461538,
"grad_norm": 0.7752998471260071,
"learning_rate": 9.865747936866027e-06,
"loss": 0.0611,
"step": 1800
},
{
"epoch": 5.538461538461538,
"eval_loss": 1.0868384838104248,
"eval_runtime": 1.762,
"eval_samples_per_second": 64.699,
"eval_steps_per_second": 3.405,
"step": 1800
},
{
"epoch": 5.6923076923076925,
"grad_norm": 1.0308915376663208,
"learning_rate": 9.329223556089976e-06,
"loss": 0.0675,
"step": 1850
},
{
"epoch": 5.6923076923076925,
"eval_loss": 1.0818272829055786,
"eval_runtime": 1.7338,
"eval_samples_per_second": 65.753,
"eval_steps_per_second": 3.461,
"step": 1850
},
{
"epoch": 5.846153846153846,
"grad_norm": 0.47693803906440735,
"learning_rate": 8.79463319744677e-06,
"loss": 0.0631,
"step": 1900
},
{
"epoch": 5.846153846153846,
"eval_loss": 1.0780550241470337,
"eval_runtime": 1.778,
"eval_samples_per_second": 64.118,
"eval_steps_per_second": 3.375,
"step": 1900
},
{
"epoch": 6.0,
"grad_norm": 0.3759777247905731,
"learning_rate": 8.263518223330698e-06,
"loss": 0.0609,
"step": 1950
},
{
"epoch": 6.0,
"eval_loss": 1.0708911418914795,
"eval_runtime": 1.7659,
"eval_samples_per_second": 64.557,
"eval_steps_per_second": 3.398,
"step": 1950
},
{
"epoch": 6.153846153846154,
"grad_norm": 0.34113025665283203,
"learning_rate": 7.73740997570278e-06,
"loss": 0.0473,
"step": 2000
},
{
"epoch": 6.153846153846154,
"eval_loss": 1.1129051446914673,
"eval_runtime": 1.7485,
"eval_samples_per_second": 65.199,
"eval_steps_per_second": 3.432,
"step": 2000
}
],
"logging_steps": 50,
"max_steps": 3250,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000,
"total_flos": 1.223215769756631e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}