|
{ |
|
"best_metric": 0.2140692864641761, |
|
"best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_0/checkpoint-292", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 292, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03424657534246575, |
|
"grad_norm": 8.432939529418945, |
|
"learning_rate": 8.18270817933484e-07, |
|
"loss": 2.2234, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0684931506849315, |
|
"grad_norm": 8.3140230178833, |
|
"learning_rate": 1.636541635866968e-06, |
|
"loss": 2.2403, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10273972602739725, |
|
"grad_norm": 7.506945610046387, |
|
"learning_rate": 2.454812453800452e-06, |
|
"loss": 2.2297, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.136986301369863, |
|
"grad_norm": 8.531285285949707, |
|
"learning_rate": 3.273083271733936e-06, |
|
"loss": 2.1823, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17123287671232876, |
|
"grad_norm": 7.951968669891357, |
|
"learning_rate": 4.091354089667421e-06, |
|
"loss": 2.1696, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2054794520547945, |
|
"grad_norm": 8.328391075134277, |
|
"learning_rate": 4.909624907600904e-06, |
|
"loss": 2.1415, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.23972602739726026, |
|
"grad_norm": 7.337668418884277, |
|
"learning_rate": 5.727895725534388e-06, |
|
"loss": 2.1206, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.273972602739726, |
|
"grad_norm": 7.09800386428833, |
|
"learning_rate": 6.546166543467872e-06, |
|
"loss": 2.0949, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3082191780821918, |
|
"grad_norm": 6.549777507781982, |
|
"learning_rate": 7.364437361401356e-06, |
|
"loss": 2.076, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3424657534246575, |
|
"grad_norm": 7.464539051055908, |
|
"learning_rate": 8.182708179334841e-06, |
|
"loss": 2.096, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3767123287671233, |
|
"grad_norm": 6.982883930206299, |
|
"learning_rate": 9.000978997268324e-06, |
|
"loss": 2.0615, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 7.149113655090332, |
|
"learning_rate": 9.819249815201808e-06, |
|
"loss": 2.0785, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4452054794520548, |
|
"grad_norm": 6.507534503936768, |
|
"learning_rate": 1.0637520633135292e-05, |
|
"loss": 2.0865, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.4794520547945205, |
|
"grad_norm": 6.1932196617126465, |
|
"learning_rate": 1.1455791451068777e-05, |
|
"loss": 2.0533, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.5136986301369864, |
|
"grad_norm": 7.072916507720947, |
|
"learning_rate": 1.1783771409556479e-05, |
|
"loss": 2.0613, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.547945205479452, |
|
"grad_norm": 8.155049324035645, |
|
"learning_rate": 1.1621460508598402e-05, |
|
"loss": 2.0284, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5821917808219178, |
|
"grad_norm": 9.3668851852417, |
|
"learning_rate": 1.1459149607640322e-05, |
|
"loss": 2.0137, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.6164383561643836, |
|
"grad_norm": 8.165146827697754, |
|
"learning_rate": 1.1296838706682245e-05, |
|
"loss": 1.9835, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6506849315068494, |
|
"grad_norm": 9.0431547164917, |
|
"learning_rate": 1.1134527805724166e-05, |
|
"loss": 1.9934, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.684931506849315, |
|
"grad_norm": 8.92784309387207, |
|
"learning_rate": 1.0972216904766088e-05, |
|
"loss": 2.0081, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7191780821917808, |
|
"grad_norm": 9.184441566467285, |
|
"learning_rate": 1.0809906003808009e-05, |
|
"loss": 1.9567, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7534246575342466, |
|
"grad_norm": 10.262762069702148, |
|
"learning_rate": 1.0647595102849931e-05, |
|
"loss": 1.966, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.7876712328767124, |
|
"grad_norm": 7.367489337921143, |
|
"learning_rate": 1.0485284201891854e-05, |
|
"loss": 2.051, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 8.76405143737793, |
|
"learning_rate": 1.0322973300933776e-05, |
|
"loss": 2.0785, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8561643835616438, |
|
"grad_norm": 7.100874900817871, |
|
"learning_rate": 1.0160662399975697e-05, |
|
"loss": 1.9977, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.8904109589041096, |
|
"grad_norm": 8.239387512207031, |
|
"learning_rate": 9.998351499017618e-06, |
|
"loss": 2.0099, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.9246575342465754, |
|
"grad_norm": 7.704412460327148, |
|
"learning_rate": 9.83604059805954e-06, |
|
"loss": 1.9851, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.958904109589041, |
|
"grad_norm": 7.8163652420043945, |
|
"learning_rate": 9.673729697101463e-06, |
|
"loss": 2.0085, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.9931506849315068, |
|
"grad_norm": 8.153071403503418, |
|
"learning_rate": 9.511418796143383e-06, |
|
"loss": 1.9962, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_classification_report": { |
|
"accuracy": 0.2375, |
|
"ar": { |
|
"f1-score": 0.0622568093385214, |
|
"precision": 0.1568627450980392, |
|
"recall": 0.038834951456310676, |
|
"support": 206.0 |
|
}, |
|
"cl": { |
|
"f1-score": 0.2011173184357542, |
|
"precision": 0.16901408450704225, |
|
"recall": 0.2482758620689655, |
|
"support": 290.0 |
|
}, |
|
"co": { |
|
"f1-score": 0.31543624161073824, |
|
"precision": 0.23383084577114427, |
|
"recall": 0.4845360824742268, |
|
"support": 291.0 |
|
}, |
|
"es": { |
|
"f1-score": 0.27364185110663986, |
|
"precision": 0.3119266055045872, |
|
"recall": 0.24372759856630824, |
|
"support": 279.0 |
|
}, |
|
"macro avg": { |
|
"f1-score": 0.2140692864641761, |
|
"precision": 0.24922489036795273, |
|
"recall": 0.22042150190811827, |
|
"support": 2000.0 |
|
}, |
|
"mx": { |
|
"f1-score": 0.22950819672131148, |
|
"precision": 0.28426395939086296, |
|
"recall": 0.19243986254295534, |
|
"support": 291.0 |
|
}, |
|
"pe": { |
|
"f1-score": 0.10610079575596817, |
|
"precision": 0.23255813953488372, |
|
"recall": 0.06872852233676977, |
|
"support": 291.0 |
|
}, |
|
"pr": { |
|
"f1-score": 0.5030674846625767, |
|
"precision": 0.6612903225806451, |
|
"recall": 0.40594059405940597, |
|
"support": 101.0 |
|
}, |
|
"uy": { |
|
"f1-score": 0.2354948805460751, |
|
"precision": 0.19327731092436976, |
|
"recall": 0.30131004366812225, |
|
"support": 229.0 |
|
}, |
|
"ve": { |
|
"f1-score": 0.0, |
|
"precision": 0.0, |
|
"recall": 0.0, |
|
"support": 22.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.22084365412222065, |
|
"precision": 0.24892308331106963, |
|
"recall": 0.2375, |
|
"support": 2000.0 |
|
} |
|
}, |
|
"eval_f1": 0.2140692864641761, |
|
"eval_loss": 1.964627742767334, |
|
"eval_runtime": 3.948, |
|
"eval_samples_per_second": 506.591, |
|
"eval_steps_per_second": 31.662, |
|
"step": 292 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 876, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 306938335993344.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|