Bjarne12's picture
Pushed the IDEFICS2 fine-tuned model.
b5133fb verified
raw
history blame
4.32 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.2,
"eval_steps": 10,
"global_step": 75,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"grad_norm": 9.971570014953613,
"learning_rate": 0.00019,
"loss": 0.3706,
"step": 5
},
{
"epoch": 0.16,
"grad_norm": 4.22176456451416,
"learning_rate": 0.00018,
"loss": 0.3124,
"step": 10
},
{
"epoch": 0.16,
"eval_loss": 0.3505728542804718,
"eval_runtime": 64.0226,
"eval_samples_per_second": 3.124,
"eval_steps_per_second": 1.562,
"step": 10
},
{
"epoch": 0.24,
"grad_norm": 3.4927077293395996,
"learning_rate": 0.00017,
"loss": 0.2459,
"step": 15
},
{
"epoch": 0.32,
"grad_norm": 5.083699703216553,
"learning_rate": 0.00016,
"loss": 0.2418,
"step": 20
},
{
"epoch": 0.32,
"eval_loss": 0.3516603112220764,
"eval_runtime": 63.9821,
"eval_samples_per_second": 3.126,
"eval_steps_per_second": 1.563,
"step": 20
},
{
"epoch": 0.4,
"grad_norm": 4.345269680023193,
"learning_rate": 0.00015000000000000001,
"loss": 0.2474,
"step": 25
},
{
"epoch": 0.48,
"grad_norm": 3.5892534255981445,
"learning_rate": 0.00014,
"loss": 0.2916,
"step": 30
},
{
"epoch": 0.48,
"eval_loss": 0.34584036469459534,
"eval_runtime": 64.416,
"eval_samples_per_second": 3.105,
"eval_steps_per_second": 1.552,
"step": 30
},
{
"epoch": 0.56,
"grad_norm": 3.6377668380737305,
"learning_rate": 0.00013000000000000002,
"loss": 0.2859,
"step": 35
},
{
"epoch": 0.64,
"grad_norm": 3.6360130310058594,
"learning_rate": 0.00012,
"loss": 0.2943,
"step": 40
},
{
"epoch": 0.64,
"eval_loss": 0.33658257126808167,
"eval_runtime": 64.2616,
"eval_samples_per_second": 3.112,
"eval_steps_per_second": 1.556,
"step": 40
},
{
"epoch": 0.72,
"grad_norm": 4.8437323570251465,
"learning_rate": 0.00011000000000000002,
"loss": 0.2664,
"step": 45
},
{
"epoch": 0.8,
"grad_norm": 2.5033814907073975,
"learning_rate": 0.0001,
"loss": 0.2793,
"step": 50
},
{
"epoch": 0.8,
"eval_loss": 0.34766554832458496,
"eval_runtime": 64.2152,
"eval_samples_per_second": 3.115,
"eval_steps_per_second": 1.557,
"step": 50
},
{
"epoch": 0.88,
"grad_norm": 3.4198801517486572,
"learning_rate": 9e-05,
"loss": 0.2661,
"step": 55
},
{
"epoch": 0.96,
"grad_norm": 3.3779234886169434,
"learning_rate": 8e-05,
"loss": 0.2596,
"step": 60
},
{
"epoch": 0.96,
"eval_loss": 0.32815971970558167,
"eval_runtime": 64.2353,
"eval_samples_per_second": 3.114,
"eval_steps_per_second": 1.557,
"step": 60
},
{
"epoch": 1.04,
"grad_norm": 1.620568037033081,
"learning_rate": 7e-05,
"loss": 0.2272,
"step": 65
},
{
"epoch": 1.12,
"grad_norm": 2.3302671909332275,
"learning_rate": 6e-05,
"loss": 0.1611,
"step": 70
},
{
"epoch": 1.12,
"eval_loss": 0.3327696919441223,
"eval_runtime": 64.397,
"eval_samples_per_second": 3.106,
"eval_steps_per_second": 1.553,
"step": 70
},
{
"epoch": 1.2,
"grad_norm": 2.511664867401123,
"learning_rate": 5e-05,
"loss": 0.1351,
"step": 75
}
],
"logging_steps": 5,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5587264613696832.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}