|
{ |
|
"best_metric": 0.3499479191731958, |
|
"best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_2/checkpoint-1752", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 1752, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.017123287671232876, |
|
"grad_norm": 11.892024993896484, |
|
"learning_rate": 7.592988034631688e-07, |
|
"loss": 2.215, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03424657534246575, |
|
"grad_norm": 11.759550094604492, |
|
"learning_rate": 1.5185976069263376e-06, |
|
"loss": 2.2173, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05136986301369863, |
|
"grad_norm": 11.668200492858887, |
|
"learning_rate": 2.2778964103895065e-06, |
|
"loss": 2.2151, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0684931506849315, |
|
"grad_norm": 10.476677894592285, |
|
"learning_rate": 3.0371952138526753e-06, |
|
"loss": 2.1641, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08561643835616438, |
|
"grad_norm": 9.34625244140625, |
|
"learning_rate": 3.7964940173158444e-06, |
|
"loss": 2.1653, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10273972602739725, |
|
"grad_norm": 9.768696784973145, |
|
"learning_rate": 4.555792820779013e-06, |
|
"loss": 2.1354, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11986301369863013, |
|
"grad_norm": 8.377198219299316, |
|
"learning_rate": 5.315091624242182e-06, |
|
"loss": 2.1244, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.136986301369863, |
|
"grad_norm": 11.591525077819824, |
|
"learning_rate": 6.0743904277053505e-06, |
|
"loss": 2.0764, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1541095890410959, |
|
"grad_norm": 9.914732933044434, |
|
"learning_rate": 6.83368923116852e-06, |
|
"loss": 2.1816, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.17123287671232876, |
|
"grad_norm": 11.065092086791992, |
|
"learning_rate": 7.592988034631689e-06, |
|
"loss": 2.0771, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18835616438356165, |
|
"grad_norm": 11.352612495422363, |
|
"learning_rate": 8.352286838094857e-06, |
|
"loss": 2.0857, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2054794520547945, |
|
"grad_norm": 11.720881462097168, |
|
"learning_rate": 9.111585641558026e-06, |
|
"loss": 2.0766, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2226027397260274, |
|
"grad_norm": 10.508756637573242, |
|
"learning_rate": 9.870884445021195e-06, |
|
"loss": 2.1546, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.23972602739726026, |
|
"grad_norm": 11.728009223937988, |
|
"learning_rate": 1.0630183248484364e-05, |
|
"loss": 2.0462, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2568493150684932, |
|
"grad_norm": 9.81351089477539, |
|
"learning_rate": 1.1389482051947532e-05, |
|
"loss": 2.0667, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.273972602739726, |
|
"grad_norm": 8.76012134552002, |
|
"learning_rate": 1.2148780855410701e-05, |
|
"loss": 2.0924, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2910958904109589, |
|
"grad_norm": 9.990751266479492, |
|
"learning_rate": 1.290807965887387e-05, |
|
"loss": 2.0672, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3082191780821918, |
|
"grad_norm": 10.576468467712402, |
|
"learning_rate": 1.366737846233704e-05, |
|
"loss": 2.0109, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3253424657534247, |
|
"grad_norm": 9.048011779785156, |
|
"learning_rate": 1.4426677265800208e-05, |
|
"loss": 2.0392, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3424657534246575, |
|
"grad_norm": 11.787674903869629, |
|
"learning_rate": 1.5185976069263378e-05, |
|
"loss": 2.034, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3595890410958904, |
|
"grad_norm": 10.221675872802734, |
|
"learning_rate": 1.5945274872726547e-05, |
|
"loss": 1.9762, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3767123287671233, |
|
"grad_norm": 9.597102165222168, |
|
"learning_rate": 1.6704573676189714e-05, |
|
"loss": 2.0765, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3938356164383562, |
|
"grad_norm": 11.017401695251465, |
|
"learning_rate": 1.7463872479652885e-05, |
|
"loss": 2.0997, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 10.641432762145996, |
|
"learning_rate": 1.8223171283116052e-05, |
|
"loss": 2.0985, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4280821917808219, |
|
"grad_norm": 8.237184524536133, |
|
"learning_rate": 1.8982470086579223e-05, |
|
"loss": 2.056, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4452054794520548, |
|
"grad_norm": 8.095678329467773, |
|
"learning_rate": 1.974176889004239e-05, |
|
"loss": 2.0807, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4623287671232877, |
|
"grad_norm": 9.02453899383545, |
|
"learning_rate": 2.050106769350556e-05, |
|
"loss": 2.102, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4794520547945205, |
|
"grad_norm": 9.920637130737305, |
|
"learning_rate": 2.126036649696873e-05, |
|
"loss": 1.9904, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4965753424657534, |
|
"grad_norm": 10.004680633544922, |
|
"learning_rate": 2.2019665300431896e-05, |
|
"loss": 2.0086, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5136986301369864, |
|
"grad_norm": 8.83816909790039, |
|
"learning_rate": 2.2140715952718072e-05, |
|
"loss": 1.9748, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5308219178082192, |
|
"grad_norm": 8.490972518920898, |
|
"learning_rate": 2.1988231683071254e-05, |
|
"loss": 2.0026, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.547945205479452, |
|
"grad_norm": 9.475632667541504, |
|
"learning_rate": 2.1835747413424435e-05, |
|
"loss": 2.0512, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.565068493150685, |
|
"grad_norm": 9.864670753479004, |
|
"learning_rate": 2.1683263143777617e-05, |
|
"loss": 2.0304, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5821917808219178, |
|
"grad_norm": 10.569999694824219, |
|
"learning_rate": 2.15307788741308e-05, |
|
"loss": 1.9682, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5993150684931506, |
|
"grad_norm": 9.928328514099121, |
|
"learning_rate": 2.137829460448398e-05, |
|
"loss": 2.0027, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6164383561643836, |
|
"grad_norm": 8.040825843811035, |
|
"learning_rate": 2.1225810334837162e-05, |
|
"loss": 1.9177, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6335616438356164, |
|
"grad_norm": 10.714200019836426, |
|
"learning_rate": 2.1073326065190344e-05, |
|
"loss": 2.0449, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6506849315068494, |
|
"grad_norm": 8.81334114074707, |
|
"learning_rate": 2.0920841795543526e-05, |
|
"loss": 2.0512, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6678082191780822, |
|
"grad_norm": 10.270297050476074, |
|
"learning_rate": 2.0768357525896707e-05, |
|
"loss": 1.9713, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.684931506849315, |
|
"grad_norm": 10.366915702819824, |
|
"learning_rate": 2.061587325624989e-05, |
|
"loss": 1.9791, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.702054794520548, |
|
"grad_norm": 10.001801490783691, |
|
"learning_rate": 2.0463388986603067e-05, |
|
"loss": 1.9183, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7191780821917808, |
|
"grad_norm": 11.41006088256836, |
|
"learning_rate": 2.031090471695625e-05, |
|
"loss": 1.9395, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7363013698630136, |
|
"grad_norm": 9.574875831604004, |
|
"learning_rate": 2.015842044730943e-05, |
|
"loss": 1.9356, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7534246575342466, |
|
"grad_norm": 11.301505088806152, |
|
"learning_rate": 2.0005936177662612e-05, |
|
"loss": 2.059, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7705479452054794, |
|
"grad_norm": 11.491235733032227, |
|
"learning_rate": 1.9853451908015794e-05, |
|
"loss": 2.0022, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7876712328767124, |
|
"grad_norm": 9.770740509033203, |
|
"learning_rate": 1.9700967638368976e-05, |
|
"loss": 1.9812, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8047945205479452, |
|
"grad_norm": 9.801970481872559, |
|
"learning_rate": 1.9548483368722154e-05, |
|
"loss": 2.0287, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 10.885910034179688, |
|
"learning_rate": 1.9395999099075336e-05, |
|
"loss": 2.0574, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.839041095890411, |
|
"grad_norm": 10.215595245361328, |
|
"learning_rate": 1.9243514829428517e-05, |
|
"loss": 2.0379, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8561643835616438, |
|
"grad_norm": 10.391200065612793, |
|
"learning_rate": 1.9091030559781702e-05, |
|
"loss": 1.9701, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8732876712328768, |
|
"grad_norm": 10.24344253540039, |
|
"learning_rate": 1.893854629013488e-05, |
|
"loss": 1.9256, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8904109589041096, |
|
"grad_norm": 11.228612899780273, |
|
"learning_rate": 1.8786062020488062e-05, |
|
"loss": 2.029, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9075342465753424, |
|
"grad_norm": 13.008833885192871, |
|
"learning_rate": 1.8633577750841244e-05, |
|
"loss": 1.9837, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9246575342465754, |
|
"grad_norm": 10.637398719787598, |
|
"learning_rate": 1.8481093481194426e-05, |
|
"loss": 1.8445, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9417808219178082, |
|
"grad_norm": 13.296565055847168, |
|
"learning_rate": 1.8328609211547607e-05, |
|
"loss": 1.9631, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.958904109589041, |
|
"grad_norm": 11.811957359313965, |
|
"learning_rate": 1.817612494190079e-05, |
|
"loss": 1.961, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.976027397260274, |
|
"grad_norm": 11.553853988647461, |
|
"learning_rate": 1.8023640672253967e-05, |
|
"loss": 1.9842, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.9931506849315068, |
|
"grad_norm": 12.016478538513184, |
|
"learning_rate": 1.787115640260715e-05, |
|
"loss": 1.9183, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_classification_report": { |
|
"accuracy": 0.273, |
|
"ar": { |
|
"f1-score": 0.22297297297297297, |
|
"precision": 0.36666666666666664, |
|
"recall": 0.16019417475728157, |
|
"support": 206.0 |
|
}, |
|
"cl": { |
|
"f1-score": 0.06876790830945559, |
|
"precision": 0.2033898305084746, |
|
"recall": 0.041379310344827586, |
|
"support": 290.0 |
|
}, |
|
"co": { |
|
"f1-score": 0.3118081180811808, |
|
"precision": 0.21311475409836064, |
|
"recall": 0.5807560137457045, |
|
"support": 291.0 |
|
}, |
|
"es": { |
|
"f1-score": 0.33212996389891697, |
|
"precision": 0.33454545454545453, |
|
"recall": 0.32974910394265233, |
|
"support": 279.0 |
|
}, |
|
"macro avg": { |
|
"f1-score": 0.2506678702739225, |
|
"precision": 0.29977219111270315, |
|
"recall": 0.2607198556236934, |
|
"support": 2000.0 |
|
}, |
|
"mx": { |
|
"f1-score": 0.26535626535626533, |
|
"precision": 0.46551724137931033, |
|
"recall": 0.18556701030927836, |
|
"support": 291.0 |
|
}, |
|
"pe": { |
|
"f1-score": 0.21484375, |
|
"precision": 0.248868778280543, |
|
"recall": 0.18900343642611683, |
|
"support": 291.0 |
|
}, |
|
"pr": { |
|
"f1-score": 0.574585635359116, |
|
"precision": 0.65, |
|
"recall": 0.5148514851485149, |
|
"support": 101.0 |
|
}, |
|
"uy": { |
|
"f1-score": 0.26554621848739496, |
|
"precision": 0.21584699453551912, |
|
"recall": 0.34497816593886466, |
|
"support": 229.0 |
|
}, |
|
"ve": { |
|
"f1-score": 0.0, |
|
"precision": 0.0, |
|
"recall": 0.0, |
|
"support": 22.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.2539284929025767, |
|
"precision": 0.30641812645562344, |
|
"recall": 0.273, |
|
"support": 2000.0 |
|
} |
|
}, |
|
"eval_f1": 0.2506678702739225, |
|
"eval_loss": 1.8817169666290283, |
|
"eval_runtime": 2.8277, |
|
"eval_samples_per_second": 707.278, |
|
"eval_steps_per_second": 88.41, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.0102739726027397, |
|
"grad_norm": 11.690625190734863, |
|
"learning_rate": 1.771867213296033e-05, |
|
"loss": 1.8131, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.0273972602739727, |
|
"grad_norm": 12.48835563659668, |
|
"learning_rate": 1.7566187863313512e-05, |
|
"loss": 1.7617, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0445205479452055, |
|
"grad_norm": 15.49386978149414, |
|
"learning_rate": 1.7413703593666694e-05, |
|
"loss": 1.7815, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.0616438356164384, |
|
"grad_norm": 17.888931274414062, |
|
"learning_rate": 1.7261219324019876e-05, |
|
"loss": 1.7337, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0787671232876712, |
|
"grad_norm": 15.020529747009277, |
|
"learning_rate": 1.7108735054373058e-05, |
|
"loss": 1.6467, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.095890410958904, |
|
"grad_norm": 13.33208179473877, |
|
"learning_rate": 1.695625078472624e-05, |
|
"loss": 1.8363, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.1130136986301369, |
|
"grad_norm": 18.24432373046875, |
|
"learning_rate": 1.680376651507942e-05, |
|
"loss": 1.9009, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.13013698630137, |
|
"grad_norm": 13.63119125366211, |
|
"learning_rate": 1.6651282245432603e-05, |
|
"loss": 1.7742, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.1472602739726028, |
|
"grad_norm": 15.010072708129883, |
|
"learning_rate": 1.6498797975785784e-05, |
|
"loss": 1.6258, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.1643835616438356, |
|
"grad_norm": 11.895198822021484, |
|
"learning_rate": 1.6346313706138963e-05, |
|
"loss": 1.8729, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.1815068493150684, |
|
"grad_norm": 14.77038288116455, |
|
"learning_rate": 1.6193829436492144e-05, |
|
"loss": 1.7128, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.1986301369863013, |
|
"grad_norm": 11.579257011413574, |
|
"learning_rate": 1.6041345166845326e-05, |
|
"loss": 1.8186, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.2157534246575343, |
|
"grad_norm": 13.447906494140625, |
|
"learning_rate": 1.5888860897198508e-05, |
|
"loss": 1.7262, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.2328767123287672, |
|
"grad_norm": 16.56707763671875, |
|
"learning_rate": 1.573637662755169e-05, |
|
"loss": 1.8165, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 12.793381690979004, |
|
"learning_rate": 1.558389235790487e-05, |
|
"loss": 1.7352, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.2671232876712328, |
|
"grad_norm": 14.821900367736816, |
|
"learning_rate": 1.543140808825805e-05, |
|
"loss": 1.8093, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.2842465753424657, |
|
"grad_norm": 13.674068450927734, |
|
"learning_rate": 1.527892381861123e-05, |
|
"loss": 1.7997, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.3013698630136985, |
|
"grad_norm": 21.423152923583984, |
|
"learning_rate": 1.5126439548964413e-05, |
|
"loss": 1.8501, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.3184931506849316, |
|
"grad_norm": 13.842451095581055, |
|
"learning_rate": 1.4973955279317594e-05, |
|
"loss": 1.6396, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.3356164383561644, |
|
"grad_norm": 12.872172355651855, |
|
"learning_rate": 1.4821471009670778e-05, |
|
"loss": 1.7377, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.3527397260273972, |
|
"grad_norm": 18.74566650390625, |
|
"learning_rate": 1.466898674002396e-05, |
|
"loss": 1.7576, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.36986301369863, |
|
"grad_norm": 15.825615882873535, |
|
"learning_rate": 1.451650247037714e-05, |
|
"loss": 1.738, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3869863013698631, |
|
"grad_norm": 13.375591278076172, |
|
"learning_rate": 1.4364018200730321e-05, |
|
"loss": 1.6459, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.404109589041096, |
|
"grad_norm": 17.51374053955078, |
|
"learning_rate": 1.4211533931083503e-05, |
|
"loss": 1.7292, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.4212328767123288, |
|
"grad_norm": 15.895259857177734, |
|
"learning_rate": 1.4059049661436683e-05, |
|
"loss": 1.8173, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.4383561643835616, |
|
"grad_norm": 24.653060913085938, |
|
"learning_rate": 1.3906565391789864e-05, |
|
"loss": 1.7303, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.4554794520547945, |
|
"grad_norm": 19.205734252929688, |
|
"learning_rate": 1.3754081122143046e-05, |
|
"loss": 1.5581, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.4726027397260273, |
|
"grad_norm": 13.14504337310791, |
|
"learning_rate": 1.3601596852496226e-05, |
|
"loss": 1.7028, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.4897260273972603, |
|
"grad_norm": 19.409351348876953, |
|
"learning_rate": 1.3449112582849408e-05, |
|
"loss": 1.7864, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.5068493150684932, |
|
"grad_norm": 20.153881072998047, |
|
"learning_rate": 1.329662831320259e-05, |
|
"loss": 1.5898, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.523972602739726, |
|
"grad_norm": 13.584912300109863, |
|
"learning_rate": 1.314414404355577e-05, |
|
"loss": 1.6459, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.541095890410959, |
|
"grad_norm": 20.62996482849121, |
|
"learning_rate": 1.2991659773908951e-05, |
|
"loss": 1.8207, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.558219178082192, |
|
"grad_norm": 16.82609748840332, |
|
"learning_rate": 1.2839175504262133e-05, |
|
"loss": 1.6256, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.5753424657534247, |
|
"grad_norm": 19.674978256225586, |
|
"learning_rate": 1.2686691234615316e-05, |
|
"loss": 1.6743, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.5924657534246576, |
|
"grad_norm": 23.367334365844727, |
|
"learning_rate": 1.2534206964968498e-05, |
|
"loss": 1.602, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.6095890410958904, |
|
"grad_norm": 19.46713638305664, |
|
"learning_rate": 1.2381722695321678e-05, |
|
"loss": 1.6916, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.6267123287671232, |
|
"grad_norm": 24.35736083984375, |
|
"learning_rate": 1.222923842567486e-05, |
|
"loss": 1.6058, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 24.921846389770508, |
|
"learning_rate": 1.2076754156028041e-05, |
|
"loss": 1.7183, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.660958904109589, |
|
"grad_norm": 15.05435848236084, |
|
"learning_rate": 1.1924269886381221e-05, |
|
"loss": 1.6577, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.678082191780822, |
|
"grad_norm": 15.788996696472168, |
|
"learning_rate": 1.1771785616734403e-05, |
|
"loss": 1.6803, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.6952054794520548, |
|
"grad_norm": 18.309133529663086, |
|
"learning_rate": 1.1619301347087585e-05, |
|
"loss": 1.5949, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.7123287671232876, |
|
"grad_norm": 20.53139877319336, |
|
"learning_rate": 1.1466817077440765e-05, |
|
"loss": 1.6097, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.7294520547945207, |
|
"grad_norm": 23.069534301757812, |
|
"learning_rate": 1.1314332807793946e-05, |
|
"loss": 1.6068, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.7465753424657535, |
|
"grad_norm": 22.394153594970703, |
|
"learning_rate": 1.1161848538147128e-05, |
|
"loss": 1.6641, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.7636986301369864, |
|
"grad_norm": 20.755950927734375, |
|
"learning_rate": 1.100936426850031e-05, |
|
"loss": 1.4966, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.7808219178082192, |
|
"grad_norm": 16.427976608276367, |
|
"learning_rate": 1.0856879998853491e-05, |
|
"loss": 1.7212, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.797945205479452, |
|
"grad_norm": 33.8179817199707, |
|
"learning_rate": 1.0704395729206671e-05, |
|
"loss": 1.7811, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.8150684931506849, |
|
"grad_norm": 20.979110717773438, |
|
"learning_rate": 1.0551911459559853e-05, |
|
"loss": 1.583, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.8321917808219177, |
|
"grad_norm": 17.085895538330078, |
|
"learning_rate": 1.0399427189913035e-05, |
|
"loss": 1.7014, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.8493150684931505, |
|
"grad_norm": 18.183605194091797, |
|
"learning_rate": 1.0246942920266215e-05, |
|
"loss": 1.5758, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.8664383561643836, |
|
"grad_norm": 16.887876510620117, |
|
"learning_rate": 1.0094458650619396e-05, |
|
"loss": 1.4666, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.8835616438356164, |
|
"grad_norm": 18.789949417114258, |
|
"learning_rate": 9.94197438097258e-06, |
|
"loss": 1.7645, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.9006849315068495, |
|
"grad_norm": 18.971622467041016, |
|
"learning_rate": 9.78949011132576e-06, |
|
"loss": 1.6875, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.9178082191780823, |
|
"grad_norm": 24.858366012573242, |
|
"learning_rate": 9.637005841678942e-06, |
|
"loss": 1.6846, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.9349315068493151, |
|
"grad_norm": 13.931944847106934, |
|
"learning_rate": 9.484521572032123e-06, |
|
"loss": 1.6025, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.952054794520548, |
|
"grad_norm": 17.908315658569336, |
|
"learning_rate": 9.332037302385303e-06, |
|
"loss": 1.4988, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.9691780821917808, |
|
"grad_norm": 16.93858528137207, |
|
"learning_rate": 9.179553032738485e-06, |
|
"loss": 1.7157, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.9863013698630136, |
|
"grad_norm": 19.648006439208984, |
|
"learning_rate": 9.027068763091667e-06, |
|
"loss": 1.5337, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_classification_report": { |
|
"accuracy": 0.3535, |
|
"ar": { |
|
"f1-score": 0.31309904153354634, |
|
"precision": 0.45794392523364486, |
|
"recall": 0.23786407766990292, |
|
"support": 206.0 |
|
}, |
|
"cl": { |
|
"f1-score": 0.23689320388349513, |
|
"precision": 0.27111111111111114, |
|
"recall": 0.2103448275862069, |
|
"support": 290.0 |
|
}, |
|
"co": { |
|
"f1-score": 0.36268068331143233, |
|
"precision": 0.2936170212765957, |
|
"recall": 0.4742268041237113, |
|
"support": 291.0 |
|
}, |
|
"es": { |
|
"f1-score": 0.38976377952755903, |
|
"precision": 0.43231441048034935, |
|
"recall": 0.3548387096774194, |
|
"support": 279.0 |
|
}, |
|
"macro avg": { |
|
"f1-score": 0.3353596555098448, |
|
"precision": 0.3519727641328481, |
|
"recall": 0.3389548790003148, |
|
"support": 2000.0 |
|
}, |
|
"mx": { |
|
"f1-score": 0.36254980079681276, |
|
"precision": 0.4312796208530806, |
|
"recall": 0.3127147766323024, |
|
"support": 291.0 |
|
}, |
|
"pe": { |
|
"f1-score": 0.31550068587105623, |
|
"precision": 0.2625570776255708, |
|
"recall": 0.3951890034364261, |
|
"support": 291.0 |
|
}, |
|
"pr": { |
|
"f1-score": 0.6543778801843319, |
|
"precision": 0.6120689655172413, |
|
"recall": 0.7029702970297029, |
|
"support": 101.0 |
|
}, |
|
"uy": { |
|
"f1-score": 0.3833718244803695, |
|
"precision": 0.4068627450980392, |
|
"recall": 0.3624454148471616, |
|
"support": 229.0 |
|
}, |
|
"ve": { |
|
"f1-score": 0.0, |
|
"precision": 0.0, |
|
"recall": 0.0, |
|
"support": 22.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.34933930516945594, |
|
"precision": 0.3679569789689199, |
|
"recall": 0.3535, |
|
"support": 2000.0 |
|
} |
|
}, |
|
"eval_f1": 0.3353596555098448, |
|
"eval_loss": 1.793221116065979, |
|
"eval_runtime": 2.7925, |
|
"eval_samples_per_second": 716.208, |
|
"eval_steps_per_second": 89.526, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 2.0034246575342465, |
|
"grad_norm": 20.4486083984375, |
|
"learning_rate": 8.874584493444848e-06, |
|
"loss": 1.5645, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.0205479452054793, |
|
"grad_norm": 17.64771270751953, |
|
"learning_rate": 8.72210022379803e-06, |
|
"loss": 1.3974, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.037671232876712, |
|
"grad_norm": 13.944244384765625, |
|
"learning_rate": 8.56961595415121e-06, |
|
"loss": 1.2199, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.0547945205479454, |
|
"grad_norm": 16.506717681884766, |
|
"learning_rate": 8.417131684504392e-06, |
|
"loss": 1.2971, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.0719178082191783, |
|
"grad_norm": 22.669170379638672, |
|
"learning_rate": 8.264647414857573e-06, |
|
"loss": 1.2465, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.089041095890411, |
|
"grad_norm": 20.12689971923828, |
|
"learning_rate": 8.112163145210753e-06, |
|
"loss": 1.1901, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.106164383561644, |
|
"grad_norm": 16.720157623291016, |
|
"learning_rate": 7.959678875563935e-06, |
|
"loss": 1.1476, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.1232876712328768, |
|
"grad_norm": 13.676715850830078, |
|
"learning_rate": 7.807194605917117e-06, |
|
"loss": 1.2803, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.1404109589041096, |
|
"grad_norm": 16.257658004760742, |
|
"learning_rate": 7.654710336270298e-06, |
|
"loss": 1.148, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.1575342465753424, |
|
"grad_norm": 20.69976234436035, |
|
"learning_rate": 7.50222606662348e-06, |
|
"loss": 1.3063, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.1746575342465753, |
|
"grad_norm": 21.628828048706055, |
|
"learning_rate": 7.349741796976661e-06, |
|
"loss": 1.3074, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.191780821917808, |
|
"grad_norm": 18.611053466796875, |
|
"learning_rate": 7.197257527329842e-06, |
|
"loss": 1.2918, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.208904109589041, |
|
"grad_norm": 19.288227081298828, |
|
"learning_rate": 7.044773257683023e-06, |
|
"loss": 1.1346, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.2260273972602738, |
|
"grad_norm": 27.32809066772461, |
|
"learning_rate": 6.892288988036204e-06, |
|
"loss": 1.4171, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.243150684931507, |
|
"grad_norm": 26.971960067749023, |
|
"learning_rate": 6.739804718389387e-06, |
|
"loss": 1.0316, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.26027397260274, |
|
"grad_norm": 30.222131729125977, |
|
"learning_rate": 6.587320448742568e-06, |
|
"loss": 1.1692, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.2773972602739727, |
|
"grad_norm": 20.51207160949707, |
|
"learning_rate": 6.434836179095749e-06, |
|
"loss": 1.0899, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.2945205479452055, |
|
"grad_norm": 19.466644287109375, |
|
"learning_rate": 6.28235190944893e-06, |
|
"loss": 1.2698, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.3116438356164384, |
|
"grad_norm": 21.64017677307129, |
|
"learning_rate": 6.129867639802111e-06, |
|
"loss": 1.244, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.328767123287671, |
|
"grad_norm": 20.915922164916992, |
|
"learning_rate": 5.977383370155293e-06, |
|
"loss": 1.0774, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.345890410958904, |
|
"grad_norm": 24.89851188659668, |
|
"learning_rate": 5.8248991005084735e-06, |
|
"loss": 1.2141, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.363013698630137, |
|
"grad_norm": 24.925865173339844, |
|
"learning_rate": 5.672414830861656e-06, |
|
"loss": 1.2304, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.3801369863013697, |
|
"grad_norm": 27.173961639404297, |
|
"learning_rate": 5.519930561214836e-06, |
|
"loss": 1.1165, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.3972602739726026, |
|
"grad_norm": 19.15679931640625, |
|
"learning_rate": 5.367446291568018e-06, |
|
"loss": 1.3572, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.4143835616438354, |
|
"grad_norm": 20.516983032226562, |
|
"learning_rate": 5.214962021921199e-06, |
|
"loss": 1.2764, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.4315068493150687, |
|
"grad_norm": 19.259366989135742, |
|
"learning_rate": 5.06247775227438e-06, |
|
"loss": 1.2762, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.4486301369863015, |
|
"grad_norm": 25.636960983276367, |
|
"learning_rate": 4.909993482627562e-06, |
|
"loss": 1.1584, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 17.860868453979492, |
|
"learning_rate": 4.757509212980744e-06, |
|
"loss": 0.9371, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.482876712328767, |
|
"grad_norm": 31.28371238708496, |
|
"learning_rate": 4.6050249433339244e-06, |
|
"loss": 1.1262, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 19.444276809692383, |
|
"learning_rate": 4.452540673687105e-06, |
|
"loss": 1.2705, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.517123287671233, |
|
"grad_norm": 16.713764190673828, |
|
"learning_rate": 4.300056404040287e-06, |
|
"loss": 1.1658, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.5342465753424657, |
|
"grad_norm": 24.59725570678711, |
|
"learning_rate": 4.147572134393469e-06, |
|
"loss": 1.303, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.5513698630136985, |
|
"grad_norm": 28.553564071655273, |
|
"learning_rate": 3.9950878647466495e-06, |
|
"loss": 1.2434, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.5684931506849313, |
|
"grad_norm": 25.321197509765625, |
|
"learning_rate": 3.84260359509983e-06, |
|
"loss": 1.2506, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.5856164383561646, |
|
"grad_norm": 29.10701560974121, |
|
"learning_rate": 3.6901193254530124e-06, |
|
"loss": 1.3008, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.602739726027397, |
|
"grad_norm": 23.615144729614258, |
|
"learning_rate": 3.5376350558061937e-06, |
|
"loss": 1.0486, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.6198630136986303, |
|
"grad_norm": 28.57093620300293, |
|
"learning_rate": 3.3851507861593745e-06, |
|
"loss": 1.2169, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.636986301369863, |
|
"grad_norm": 18.41623306274414, |
|
"learning_rate": 3.2326665165125566e-06, |
|
"loss": 1.2028, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.654109589041096, |
|
"grad_norm": 23.891878128051758, |
|
"learning_rate": 3.0801822468657375e-06, |
|
"loss": 1.2732, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.671232876712329, |
|
"grad_norm": 23.33771514892578, |
|
"learning_rate": 2.9276979772189187e-06, |
|
"loss": 1.2175, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.6883561643835616, |
|
"grad_norm": 17.881248474121094, |
|
"learning_rate": 2.7752137075721e-06, |
|
"loss": 1.2555, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.7054794520547945, |
|
"grad_norm": 19.803930282592773, |
|
"learning_rate": 2.6227294379252813e-06, |
|
"loss": 1.2051, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.7226027397260273, |
|
"grad_norm": 22.89762306213379, |
|
"learning_rate": 2.470245168278463e-06, |
|
"loss": 1.1713, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.73972602739726, |
|
"grad_norm": 16.94407844543457, |
|
"learning_rate": 2.3177608986316442e-06, |
|
"loss": 1.1175, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.756849315068493, |
|
"grad_norm": 23.772077560424805, |
|
"learning_rate": 2.1652766289848255e-06, |
|
"loss": 1.1266, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.7739726027397262, |
|
"grad_norm": 38.49664306640625, |
|
"learning_rate": 2.0127923593380067e-06, |
|
"loss": 1.3238, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.791095890410959, |
|
"grad_norm": 27.785099029541016, |
|
"learning_rate": 1.860308089691188e-06, |
|
"loss": 1.1356, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.808219178082192, |
|
"grad_norm": 34.19268798828125, |
|
"learning_rate": 1.7078238200443693e-06, |
|
"loss": 1.3087, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.8253424657534247, |
|
"grad_norm": 24.389307022094727, |
|
"learning_rate": 1.5553395503975505e-06, |
|
"loss": 1.3934, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.8424657534246576, |
|
"grad_norm": 41.80609130859375, |
|
"learning_rate": 1.402855280750732e-06, |
|
"loss": 1.2443, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.8595890410958904, |
|
"grad_norm": 25.80476188659668, |
|
"learning_rate": 1.2503710111039133e-06, |
|
"loss": 1.3135, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.8767123287671232, |
|
"grad_norm": 22.425390243530273, |
|
"learning_rate": 1.0978867414570945e-06, |
|
"loss": 1.0686, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.893835616438356, |
|
"grad_norm": 35.875511169433594, |
|
"learning_rate": 9.454024718102758e-07, |
|
"loss": 1.2933, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.910958904109589, |
|
"grad_norm": 43.623722076416016, |
|
"learning_rate": 7.929182021634573e-07, |
|
"loss": 1.2489, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.928082191780822, |
|
"grad_norm": 29.01580047607422, |
|
"learning_rate": 6.404339325166384e-07, |
|
"loss": 1.2077, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.9452054794520546, |
|
"grad_norm": 19.448177337646484, |
|
"learning_rate": 4.879496628698198e-07, |
|
"loss": 1.0898, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.962328767123288, |
|
"grad_norm": 29.721946716308594, |
|
"learning_rate": 3.354653932230011e-07, |
|
"loss": 1.1101, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.9794520547945207, |
|
"grad_norm": 23.694095611572266, |
|
"learning_rate": 1.8298112357618242e-07, |
|
"loss": 1.0485, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.9965753424657535, |
|
"grad_norm": 27.28858757019043, |
|
"learning_rate": 3.049685392936374e-08, |
|
"loss": 1.1386, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_classification_report": { |
|
"accuracy": 0.368, |
|
"ar": { |
|
"f1-score": 0.34759358288770054, |
|
"precision": 0.3869047619047619, |
|
"recall": 0.3155339805825243, |
|
"support": 206.0 |
|
}, |
|
"cl": { |
|
"f1-score": 0.23636363636363636, |
|
"precision": 0.25, |
|
"recall": 0.22413793103448276, |
|
"support": 290.0 |
|
}, |
|
"co": { |
|
"f1-score": 0.38351822503961963, |
|
"precision": 0.3558823529411765, |
|
"recall": 0.41580756013745707, |
|
"support": 291.0 |
|
}, |
|
"es": { |
|
"f1-score": 0.39919354838709675, |
|
"precision": 0.45622119815668205, |
|
"recall": 0.3548387096774194, |
|
"support": 279.0 |
|
}, |
|
"macro avg": { |
|
"f1-score": 0.3499479191731958, |
|
"precision": 0.3551406608650635, |
|
"recall": 0.3502956304842808, |
|
"support": 2000.0 |
|
}, |
|
"mx": { |
|
"f1-score": 0.38202247191011235, |
|
"precision": 0.35843373493975905, |
|
"recall": 0.40893470790378006, |
|
"support": 291.0 |
|
}, |
|
"pe": { |
|
"f1-score": 0.3446244477172312, |
|
"precision": 0.3015463917525773, |
|
"recall": 0.4020618556701031, |
|
"support": 291.0 |
|
}, |
|
"pr": { |
|
"f1-score": 0.6666666666666666, |
|
"precision": 0.6601941747572816, |
|
"recall": 0.6732673267326733, |
|
"support": 101.0 |
|
}, |
|
"uy": { |
|
"f1-score": 0.38954869358669836, |
|
"precision": 0.4270833333333333, |
|
"recall": 0.35807860262008734, |
|
"support": 229.0 |
|
}, |
|
"ve": { |
|
"f1-score": 0.0, |
|
"precision": 0.0, |
|
"recall": 0.0, |
|
"support": 22.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.3655613869415472, |
|
"precision": 0.36979288589763315, |
|
"recall": 0.368, |
|
"support": 2000.0 |
|
} |
|
}, |
|
"eval_f1": 0.3499479191731958, |
|
"eval_loss": 1.8162137269973755, |
|
"eval_runtime": 2.8908, |
|
"eval_samples_per_second": 691.846, |
|
"eval_steps_per_second": 86.481, |
|
"step": 1752 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1752, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 230203751995008.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|