|
{ |
|
"best_metric": 0.29125073552131653, |
|
"best_model_checkpoint": "deit-cvc-drop-aug/checkpoint-1800", |
|
"epoch": 14.979591836734693, |
|
"eval_steps": 100, |
|
"global_step": 5505, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.991860321746833e-05, |
|
"loss": 0.5453, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.7695840775708089, |
|
"eval_f1": 0.7750933997509339, |
|
"eval_loss": 0.48243534564971924, |
|
"eval_precision": 0.7776111944027986, |
|
"eval_recall": 0.7725918570009931, |
|
"eval_runtime": 15.4841, |
|
"eval_samples_per_second": 253.098, |
|
"eval_steps_per_second": 15.823, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.967467788732156e-05, |
|
"loss": 0.4324, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.7685634090329165, |
|
"eval_f1": 0.7637405574368324, |
|
"eval_loss": 0.4796455502510071, |
|
"eval_precision": 0.8032876712328767, |
|
"eval_recall": 0.7279046673286991, |
|
"eval_runtime": 15.2008, |
|
"eval_samples_per_second": 257.815, |
|
"eval_steps_per_second": 16.118, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.926901819904178e-05, |
|
"loss": 0.4042, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.8321000255167135, |
|
"eval_f1": 0.8546819787985865, |
|
"eval_loss": 0.3790486454963684, |
|
"eval_precision": 0.7696897374701671, |
|
"eval_recall": 0.9607745779543198, |
|
"eval_runtime": 15.77, |
|
"eval_samples_per_second": 248.509, |
|
"eval_steps_per_second": 15.536, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.870294492836612e-05, |
|
"loss": 0.3849, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_accuracy": 0.8101556519520285, |
|
"eval_f1": 0.8161146811665843, |
|
"eval_loss": 0.4099680185317993, |
|
"eval_precision": 0.8125, |
|
"eval_recall": 0.8197616683217478, |
|
"eval_runtime": 13.9661, |
|
"eval_samples_per_second": 280.608, |
|
"eval_steps_per_second": 17.542, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 9.797830113701101e-05, |
|
"loss": 0.3621, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.8387343710130135, |
|
"eval_f1": 0.8510838831291235, |
|
"eval_loss": 0.36887437105178833, |
|
"eval_precision": 0.809865470852018, |
|
"eval_recall": 0.8967229394240318, |
|
"eval_runtime": 14.7142, |
|
"eval_samples_per_second": 266.341, |
|
"eval_steps_per_second": 16.651, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.709744617190038e-05, |
|
"loss": 0.3457, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.8458790507782598, |
|
"eval_f1": 0.8642086330935251, |
|
"eval_loss": 0.3312939703464508, |
|
"eval_precision": 0.7896466721446179, |
|
"eval_recall": 0.9543197616683218, |
|
"eval_runtime": 15.9456, |
|
"eval_samples_per_second": 245.773, |
|
"eval_steps_per_second": 15.365, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.606324798343567e-05, |
|
"loss": 0.3443, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.840520540954325, |
|
"eval_f1": 0.8599596683844947, |
|
"eval_loss": 0.3424055874347687, |
|
"eval_precision": 0.7835851367905268, |
|
"eval_recall": 0.9528301886792453, |
|
"eval_runtime": 15.7574, |
|
"eval_samples_per_second": 248.708, |
|
"eval_steps_per_second": 15.548, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 9.487907378781853e-05, |
|
"loss": 0.3287, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_accuracy": 0.8453687165093136, |
|
"eval_f1": 0.8560570071258907, |
|
"eval_loss": 0.33078569173812866, |
|
"eval_precision": 0.8205828779599271, |
|
"eval_recall": 0.8947368421052632, |
|
"eval_runtime": 14.597, |
|
"eval_samples_per_second": 268.479, |
|
"eval_steps_per_second": 16.784, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.35487791038282e-05, |
|
"loss": 0.3224, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_accuracy": 0.7655014034192396, |
|
"eval_f1": 0.7437970448843045, |
|
"eval_loss": 0.454572468996048, |
|
"eval_precision": 0.8480610298792117, |
|
"eval_recall": 0.6623634558093346, |
|
"eval_runtime": 15.6304, |
|
"eval_samples_per_second": 250.73, |
|
"eval_steps_per_second": 15.675, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.207669519974851e-05, |
|
"loss": 0.3096, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_accuracy": 0.8438377137024751, |
|
"eval_f1": 0.8521024649589173, |
|
"eval_loss": 0.34022626280784607, |
|
"eval_precision": 0.8300376647834274, |
|
"eval_recall": 0.8753723932472691, |
|
"eval_runtime": 15.4283, |
|
"eval_samples_per_second": 254.014, |
|
"eval_steps_per_second": 15.88, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 9.046761499131578e-05, |
|
"loss": 0.3095, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.8384792038785405, |
|
"eval_f1": 0.8524131499183958, |
|
"eval_loss": 0.36910393834114075, |
|
"eval_precision": 0.8035164835164835, |
|
"eval_recall": 0.9076464746772592, |
|
"eval_runtime": 15.4854, |
|
"eval_samples_per_second": 253.077, |
|
"eval_steps_per_second": 15.821, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 8.872677743660209e-05, |
|
"loss": 0.2901, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_accuracy": 0.8328655269201327, |
|
"eval_f1": 0.8467119120056167, |
|
"eval_loss": 0.36432939767837524, |
|
"eval_precision": 0.8007968127490039, |
|
"eval_recall": 0.8982125124131083, |
|
"eval_runtime": 14.6332, |
|
"eval_samples_per_second": 267.816, |
|
"eval_steps_per_second": 16.743, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 8.685985047864204e-05, |
|
"loss": 0.2939, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_accuracy": 0.8601684103087522, |
|
"eval_f1": 0.8760180995475113, |
|
"eval_loss": 0.30208107829093933, |
|
"eval_precision": 0.8046550290939318, |
|
"eval_recall": 0.9612711022840119, |
|
"eval_runtime": 14.6512, |
|
"eval_samples_per_second": 267.487, |
|
"eval_steps_per_second": 16.722, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 8.487291259133956e-05, |
|
"loss": 0.2946, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"eval_accuracy": 0.8300586884409288, |
|
"eval_f1": 0.8342458934793429, |
|
"eval_loss": 0.36171799898147583, |
|
"eval_precision": 0.8363273453093812, |
|
"eval_recall": 0.8321747765640516, |
|
"eval_runtime": 15.3842, |
|
"eval_samples_per_second": 254.741, |
|
"eval_steps_per_second": 15.925, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 8.277243298873936e-05, |
|
"loss": 0.2856, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_accuracy": 0.8127073232967594, |
|
"eval_f1": 0.8116016427104723, |
|
"eval_loss": 0.48843976855278015, |
|
"eval_precision": 0.8400637619553666, |
|
"eval_recall": 0.7850049652432969, |
|
"eval_runtime": 14.4792, |
|
"eval_samples_per_second": 270.663, |
|
"eval_steps_per_second": 16.921, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 8.056525056209841e-05, |
|
"loss": 0.2683, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_accuracy": 0.834141362592498, |
|
"eval_f1": 0.8380667663178873, |
|
"eval_loss": 0.3540255129337311, |
|
"eval_precision": 0.841, |
|
"eval_recall": 0.8351539225422046, |
|
"eval_runtime": 15.1561, |
|
"eval_samples_per_second": 258.575, |
|
"eval_steps_per_second": 16.165, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 7.82585516133363e-05, |
|
"loss": 0.2724, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"eval_accuracy": 0.8581270732329676, |
|
"eval_f1": 0.8664745437079732, |
|
"eval_loss": 0.30780917406082153, |
|
"eval_precision": 0.8390697674418605, |
|
"eval_recall": 0.8957298907646475, |
|
"eval_runtime": 14.446, |
|
"eval_samples_per_second": 271.286, |
|
"eval_steps_per_second": 16.96, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 7.585984645736096e-05, |
|
"loss": 0.2685, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.862720081653483, |
|
"eval_f1": 0.8703614457831326, |
|
"eval_loss": 0.29125073552131653, |
|
"eval_precision": 0.8455056179775281, |
|
"eval_recall": 0.8967229394240318, |
|
"eval_runtime": 15.5954, |
|
"eval_samples_per_second": 251.291, |
|
"eval_steps_per_second": 15.71, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 7.33769449694499e-05, |
|
"loss": 0.2449, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"eval_accuracy": 0.8443480479714213, |
|
"eval_f1": 0.8490099009900989, |
|
"eval_loss": 0.38658004999160767, |
|
"eval_precision": 0.8464955577492597, |
|
"eval_recall": 0.8515392254220456, |
|
"eval_runtime": 16.0186, |
|
"eval_samples_per_second": 244.653, |
|
"eval_steps_per_second": 15.295, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 7.081793115730153e-05, |
|
"loss": 0.2468, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"eval_accuracy": 0.8588925746363868, |
|
"eval_f1": 0.8670353450348642, |
|
"eval_loss": 0.30722111463546753, |
|
"eval_precision": 0.8405594405594405, |
|
"eval_recall": 0.8952333664349553, |
|
"eval_runtime": 15.614, |
|
"eval_samples_per_second": 250.993, |
|
"eval_steps_per_second": 15.691, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 6.819113684054634e-05, |
|
"loss": 0.2557, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"eval_accuracy": 0.8257208471548865, |
|
"eval_f1": 0.8232858990944373, |
|
"eval_loss": 0.3735339641571045, |
|
"eval_precision": 0.8595353862776878, |
|
"eval_recall": 0.7899702085402185, |
|
"eval_runtime": 15.378, |
|
"eval_samples_per_second": 254.845, |
|
"eval_steps_per_second": 15.932, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 6.550511452341459e-05, |
|
"loss": 0.25, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_accuracy": 0.8553202347537637, |
|
"eval_f1": 0.8561278863232682, |
|
"eval_loss": 0.3116500973701477, |
|
"eval_precision": 0.8754540736896731, |
|
"eval_recall": 0.8376365441906654, |
|
"eval_runtime": 15.151, |
|
"eval_samples_per_second": 258.663, |
|
"eval_steps_per_second": 16.171, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 6.276860954888322e-05, |
|
"loss": 0.2256, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"eval_accuracy": 0.8573615718295483, |
|
"eval_f1": 0.8652687394552905, |
|
"eval_loss": 0.3264216184616089, |
|
"eval_precision": 0.8407494145199064, |
|
"eval_recall": 0.891261171797418, |
|
"eval_runtime": 13.8247, |
|
"eval_samples_per_second": 283.478, |
|
"eval_steps_per_second": 17.722, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 5.999053162496453e-05, |
|
"loss": 0.234, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"eval_accuracy": 0.8295483541719827, |
|
"eval_f1": 0.8203335126412049, |
|
"eval_loss": 0.3617473244667053, |
|
"eval_precision": 0.8949530516431925, |
|
"eval_recall": 0.7571996027805362, |
|
"eval_runtime": 14.3944, |
|
"eval_samples_per_second": 272.259, |
|
"eval_steps_per_second": 17.02, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 5.717992581584373e-05, |
|
"loss": 0.2259, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_accuracy": 0.8609339117121715, |
|
"eval_f1": 0.8679428156045553, |
|
"eval_loss": 0.32836005091667175, |
|
"eval_precision": 0.8476100331282537, |
|
"eval_recall": 0.8892750744786495, |
|
"eval_runtime": 15.9578, |
|
"eval_samples_per_second": 245.585, |
|
"eval_steps_per_second": 15.353, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 5.434594309231389e-05, |
|
"loss": 0.2261, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"eval_accuracy": 0.8443480479714213, |
|
"eval_f1": 0.8375066595631327, |
|
"eval_loss": 0.34862977266311646, |
|
"eval_precision": 0.903448275862069, |
|
"eval_recall": 0.7805362462760675, |
|
"eval_runtime": 14.7094, |
|
"eval_samples_per_second": 266.428, |
|
"eval_steps_per_second": 16.656, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 5.1497810537392844e-05, |
|
"loss": 0.2087, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"eval_accuracy": 0.836948201071702, |
|
"eval_f1": 0.8365310821181888, |
|
"eval_loss": 0.3970935046672821, |
|
"eval_precision": 0.862796833773087, |
|
"eval_recall": 0.8118172790466733, |
|
"eval_runtime": 15.0606, |
|
"eval_samples_per_second": 260.215, |
|
"eval_steps_per_second": 16.268, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 4.8644801304128374e-05, |
|
"loss": 0.2035, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"eval_accuracy": 0.878285276856341, |
|
"eval_f1": 0.8830595734248591, |
|
"eval_loss": 0.31056222319602966, |
|
"eval_precision": 0.8721549636803874, |
|
"eval_recall": 0.894240317775571, |
|
"eval_runtime": 15.0478, |
|
"eval_samples_per_second": 260.437, |
|
"eval_steps_per_second": 16.281, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 4.57962044234053e-05, |
|
"loss": 0.2116, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"eval_accuracy": 0.8451135493748405, |
|
"eval_f1": 0.8428682371214082, |
|
"eval_loss": 0.3734387159347534, |
|
"eval_precision": 0.8804759329367225, |
|
"eval_recall": 0.8083416087388282, |
|
"eval_runtime": 15.0966, |
|
"eval_samples_per_second": 259.594, |
|
"eval_steps_per_second": 16.229, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 4.2961294560056445e-05, |
|
"loss": 0.1956, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"eval_accuracy": 0.85914774177086, |
|
"eval_f1": 0.863298662704309, |
|
"eval_loss": 0.3442569077014923, |
|
"eval_precision": 0.8611660079051383, |
|
"eval_recall": 0.865441906653426, |
|
"eval_runtime": 15.4752, |
|
"eval_samples_per_second": 253.245, |
|
"eval_steps_per_second": 15.832, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 4.01493018157476e-05, |
|
"loss": 0.1826, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"eval_accuracy": 0.8423067108956367, |
|
"eval_f1": 0.8373684210526314, |
|
"eval_loss": 0.3795164227485657, |
|
"eval_precision": 0.8908174692049272, |
|
"eval_recall": 0.7899702085402185, |
|
"eval_runtime": 15.3206, |
|
"eval_samples_per_second": 255.8, |
|
"eval_steps_per_second": 15.992, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 3.7369381676954284e-05, |
|
"loss": 0.1918, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"eval_accuracy": 0.8581270732329676, |
|
"eval_f1": 0.8569222851260936, |
|
"eval_loss": 0.33620110154151917, |
|
"eval_precision": 0.8894230769230769, |
|
"eval_recall": 0.8267130089374379, |
|
"eval_runtime": 14.574, |
|
"eval_samples_per_second": 268.904, |
|
"eval_steps_per_second": 16.811, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 3.463058520587625e-05, |
|
"loss": 0.1886, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"eval_accuracy": 0.8639959173258485, |
|
"eval_f1": 0.8692666176109884, |
|
"eval_loss": 0.3259018063545227, |
|
"eval_precision": 0.8589432864760058, |
|
"eval_recall": 0.8798411122144985, |
|
"eval_runtime": 14.5325, |
|
"eval_samples_per_second": 269.671, |
|
"eval_steps_per_second": 16.859, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 3.194182957134365e-05, |
|
"loss": 0.1716, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"eval_accuracy": 0.846389385047206, |
|
"eval_f1": 0.8481331987891019, |
|
"eval_loss": 0.4269343912601471, |
|
"eval_precision": 0.862051282051282, |
|
"eval_recall": 0.8346573982125124, |
|
"eval_runtime": 15.0706, |
|
"eval_samples_per_second": 260.043, |
|
"eval_steps_per_second": 16.257, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 2.9311869015663125e-05, |
|
"loss": 0.1654, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"eval_accuracy": 0.859658076039806, |
|
"eval_f1": 0.858974358974359, |
|
"eval_loss": 0.40663468837738037, |
|
"eval_precision": 0.8881230116648993, |
|
"eval_recall": 0.8316782522343595, |
|
"eval_runtime": 13.6261, |
|
"eval_samples_per_second": 287.609, |
|
"eval_steps_per_second": 17.98, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 2.674926635193134e-05, |
|
"loss": 0.1625, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"eval_accuracy": 0.8512375606021945, |
|
"eval_f1": 0.8488462535649469, |
|
"eval_loss": 0.3926689624786377, |
|
"eval_precision": 0.8882257189365166, |
|
"eval_recall": 0.8128103277060575, |
|
"eval_runtime": 15.0764, |
|
"eval_samples_per_second": 259.943, |
|
"eval_steps_per_second": 16.251, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"learning_rate": 2.42623650846177e-05, |
|
"loss": 0.1659, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"eval_accuracy": 0.8548099004848175, |
|
"eval_f1": 0.8529335745670716, |
|
"eval_loss": 0.37967267632484436, |
|
"eval_precision": 0.889487870619946, |
|
"eval_recall": 0.8192651439920556, |
|
"eval_runtime": 14.8402, |
|
"eval_samples_per_second": 264.08, |
|
"eval_steps_per_second": 16.509, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 2.1859262244187556e-05, |
|
"loss": 0.1519, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"eval_accuracy": 0.8512375606021945, |
|
"eval_f1": 0.8502440277421012, |
|
"eval_loss": 0.4088890254497528, |
|
"eval_precision": 0.8807876530069185, |
|
"eval_recall": 0.8217477656405164, |
|
"eval_runtime": 15.4244, |
|
"eval_samples_per_second": 254.079, |
|
"eval_steps_per_second": 15.884, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 1.9547782024213047e-05, |
|
"loss": 0.1484, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"eval_accuracy": 0.8545547333503445, |
|
"eval_f1": 0.8533950617283951, |
|
"eval_loss": 0.3864934742450714, |
|
"eval_precision": 0.8852721451440768, |
|
"eval_recall": 0.823733862959285, |
|
"eval_runtime": 14.9541, |
|
"eval_samples_per_second": 262.069, |
|
"eval_steps_per_second": 16.383, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 1.7335450306805827e-05, |
|
"loss": 0.1427, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"eval_accuracy": 0.846389385047206, |
|
"eval_f1": 0.8427377220480669, |
|
"eval_loss": 0.4346730411052704, |
|
"eval_precision": 0.8891951488423374, |
|
"eval_recall": 0.8008937437934459, |
|
"eval_runtime": 15.9395, |
|
"eval_samples_per_second": 245.867, |
|
"eval_steps_per_second": 15.371, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 11.16, |
|
"learning_rate": 1.522947015931348e-05, |
|
"loss": 0.1375, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 11.16, |
|
"eval_accuracy": 0.8548099004848175, |
|
"eval_f1": 0.8532370389476398, |
|
"eval_loss": 0.4687642753124237, |
|
"eval_precision": 0.8878153515834675, |
|
"eval_recall": 0.8212512413108243, |
|
"eval_runtime": 14.957, |
|
"eval_samples_per_second": 262.017, |
|
"eval_steps_per_second": 16.38, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 1.3236698382059287e-05, |
|
"loss": 0.1276, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"eval_accuracy": 0.846899719316152, |
|
"eval_f1": 0.8426023084994753, |
|
"eval_loss": 0.4686568081378937, |
|
"eval_precision": 0.8932146829810901, |
|
"eval_recall": 0.7974180734856008, |
|
"eval_runtime": 13.9054, |
|
"eval_samples_per_second": 281.833, |
|
"eval_steps_per_second": 17.619, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 1.1363623183482775e-05, |
|
"loss": 0.1275, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"eval_accuracy": 0.8486858892574637, |
|
"eval_f1": 0.8447237496726892, |
|
"eval_loss": 0.4493071436882019, |
|
"eval_precision": 0.8936288088642659, |
|
"eval_recall": 0.8008937437934459, |
|
"eval_runtime": 14.5625, |
|
"eval_samples_per_second": 269.116, |
|
"eval_steps_per_second": 16.824, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"learning_rate": 9.616343055368083e-06, |
|
"loss": 0.1349, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"eval_accuracy": 0.8423067108956367, |
|
"eval_f1": 0.8360742705570292, |
|
"eval_loss": 0.4618338346481323, |
|
"eval_precision": 0.89749430523918, |
|
"eval_recall": 0.7825223435948362, |
|
"eval_runtime": 14.9603, |
|
"eval_samples_per_second": 261.96, |
|
"eval_steps_per_second": 16.377, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 8.000546916939644e-06, |
|
"loss": 0.1217, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"eval_accuracy": 0.849706557795356, |
|
"eval_f1": 0.8450407787424362, |
|
"eval_loss": 0.4635533094406128, |
|
"eval_precision": 0.8987129266927812, |
|
"eval_recall": 0.7974180734856008, |
|
"eval_runtime": 15.9792, |
|
"eval_samples_per_second": 245.256, |
|
"eval_steps_per_second": 15.332, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"learning_rate": 6.521495592473259e-06, |
|
"loss": 0.1211, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"eval_accuracy": 0.8555754018882368, |
|
"eval_f1": 0.855316973415133, |
|
"eval_loss": 0.45266029238700867, |
|
"eval_precision": 0.8814541622760801, |
|
"eval_recall": 0.8306852035749752, |
|
"eval_runtime": 15.4568, |
|
"eval_samples_per_second": 253.545, |
|
"eval_steps_per_second": 15.851, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 5.184004682729348e-06, |
|
"loss": 0.1164, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"eval_accuracy": 0.8545547333503445, |
|
"eval_f1": 0.851639770952629, |
|
"eval_loss": 0.46692270040512085, |
|
"eval_precision": 0.8949671772428884, |
|
"eval_recall": 0.8123138033763655, |
|
"eval_runtime": 16.339, |
|
"eval_samples_per_second": 239.856, |
|
"eval_steps_per_second": 14.995, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 3.992428885976652e-06, |
|
"loss": 0.1119, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"eval_accuracy": 0.8517478948711406, |
|
"eval_f1": 0.8495987574424021, |
|
"eval_loss": 0.461725115776062, |
|
"eval_precision": 0.8875067604110329, |
|
"eval_recall": 0.8147964250248262, |
|
"eval_runtime": 15.7126, |
|
"eval_samples_per_second": 249.418, |
|
"eval_steps_per_second": 15.593, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 2.9506478196551055e-06, |
|
"loss": 0.11, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"eval_accuracy": 0.8507272263332483, |
|
"eval_f1": 0.848012470771629, |
|
"eval_loss": 0.4718102216720581, |
|
"eval_precision": 0.8893732970027248, |
|
"eval_recall": 0.8103277060575969, |
|
"eval_runtime": 15.1226, |
|
"eval_samples_per_second": 259.149, |
|
"eval_steps_per_second": 16.201, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 2.062053388840768e-06, |
|
"loss": 0.1138, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"eval_accuracy": 0.8479203878540444, |
|
"eval_f1": 0.8437336130047195, |
|
"eval_loss": 0.48920777440071106, |
|
"eval_precision": 0.8938888888888888, |
|
"eval_recall": 0.7989076464746773, |
|
"eval_runtime": 15.5429, |
|
"eval_samples_per_second": 252.14, |
|
"eval_steps_per_second": 15.763, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 1.329538742639358e-06, |
|
"loss": 0.1058, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"eval_accuracy": 0.849961724929829, |
|
"eval_f1": 0.8474312402698495, |
|
"eval_loss": 0.47247427701950073, |
|
"eval_precision": 0.8875, |
|
"eval_recall": 0.810824230387289, |
|
"eval_runtime": 16.0011, |
|
"eval_samples_per_second": 244.92, |
|
"eval_steps_per_second": 15.311, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 7.554888544652305e-07, |
|
"loss": 0.1042, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"eval_accuracy": 0.849706557795356, |
|
"eval_f1": 0.8464946572843368, |
|
"eval_loss": 0.47878143191337585, |
|
"eval_precision": 0.8908392759188152, |
|
"eval_recall": 0.8063555114200596, |
|
"eval_runtime": 15.3441, |
|
"eval_samples_per_second": 255.408, |
|
"eval_steps_per_second": 15.967, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 3.4177275687476974e-07, |
|
"loss": 0.107, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"eval_accuracy": 0.849706557795356, |
|
"eval_f1": 0.8466545170528509, |
|
"eval_loss": 0.4759483337402344, |
|
"eval_precision": 0.8899835796387521, |
|
"eval_recall": 0.8073485600794439, |
|
"eval_runtime": 15.151, |
|
"eval_samples_per_second": 258.663, |
|
"eval_steps_per_second": 16.171, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 14.69, |
|
"learning_rate": 8.973745623699903e-08, |
|
"loss": 0.1047, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 14.69, |
|
"eval_accuracy": 0.8489410563919367, |
|
"eval_f1": 0.8458333333333334, |
|
"eval_loss": 0.4766782820224762, |
|
"eval_precision": 0.8893756845564075, |
|
"eval_recall": 0.8063555114200596, |
|
"eval_runtime": 15.3016, |
|
"eval_samples_per_second": 256.118, |
|
"eval_steps_per_second": 16.011, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 2.0354705417280351e-10, |
|
"loss": 0.1085, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_accuracy": 0.8489410563919367, |
|
"eval_f1": 0.8458333333333334, |
|
"eval_loss": 0.47687891125679016, |
|
"eval_precision": 0.8893756845564075, |
|
"eval_recall": 0.8063555114200596, |
|
"eval_runtime": 15.647, |
|
"eval_samples_per_second": 250.463, |
|
"eval_steps_per_second": 15.658, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"step": 5505, |
|
"total_flos": 2.046979800964418e+20, |
|
"train_loss": 0.2219778089443193, |
|
"train_runtime": 55065.7639, |
|
"train_samples_per_second": 9.607, |
|
"train_steps_per_second": 0.1 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"eval_accuracy": 0.725, |
|
"eval_f1": 0.7441860465116279, |
|
"eval_loss": 0.5065506100654602, |
|
"eval_precision": 0.6956521739130435, |
|
"eval_recall": 0.8, |
|
"eval_runtime": 1.8887, |
|
"eval_samples_per_second": 21.179, |
|
"eval_steps_per_second": 1.588, |
|
"step": 5505 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 5505, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 100, |
|
"total_flos": 2.046979800964418e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|