{ "best_metric": null, "best_model_checkpoint": null, "epoch": 12.0, "eval_steps": 500, "global_step": 90492, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.18677596747875214, "learning_rate": 1.3320367751060821e-05, "loss": 1.6931, "step": 7541 }, { "epoch": 1.0, "eval_AFQMC_loss": 0.02504170872271061, "eval_AFQMC_runtime": 99.0724, "eval_AFQMC_samples_per_second": 43.564, "eval_AFQMC_steps_per_second": 0.343, "eval_emb_eval_pearson_cosine": 0.49435896960848713, "eval_emb_eval_spearman_cosine": 0.5280909639875639, "step": 7541 }, { "epoch": 1.0, "eval_ATEC_loss": 0.01953265070915222, "eval_ATEC_runtime": 6.6017, "eval_ATEC_samples_per_second": 3029.521, "eval_ATEC_steps_per_second": 23.782, "step": 7541 }, { "epoch": 1.0, "eval_BQ_loss": 0.01803545467555523, "eval_BQ_runtime": 3.4373, "eval_BQ_samples_per_second": 2909.235, "eval_BQ_steps_per_second": 22.983, "step": 7541 }, { "epoch": 1.0, "eval_Cmnli_loss": 0.020238544791936874, "eval_Cmnli_runtime": 3.9735, "eval_Cmnli_samples_per_second": 2092.612, "eval_Cmnli_steps_per_second": 16.358, "step": 7541 }, { "epoch": 1.0, "eval_LCQMC_loss": 0.03518352285027504, "eval_LCQMC_runtime": 2.3275, "eval_LCQMC_samples_per_second": 3781.698, "eval_LCQMC_steps_per_second": 29.645, "step": 7541 }, { "epoch": 1.0, "eval_Ocnli_loss": 0.025195566937327385, "eval_Ocnli_runtime": 0.6266, "eval_Ocnli_samples_per_second": 2947.795, "eval_Ocnli_steps_per_second": 23.94, "step": 7541 }, { "epoch": 1.0, "eval_PAWSX_loss": 0.067341148853302, "eval_PAWSX_runtime": 0.9538, "eval_PAWSX_samples_per_second": 2096.982, "eval_PAWSX_steps_per_second": 16.776, "step": 7541 }, { "epoch": 1.0, "eval_QBQTC_loss": 8.280766487121582, "eval_QBQTC_runtime": 7.6679, "eval_QBQTC_samples_per_second": 2608.269, "eval_QBQTC_steps_per_second": 20.475, "step": 7541 }, { "epoch": 1.0, "eval_STS-B_loss": 8.6417875289917, "eval_STS-B_runtime": 0.5653, "eval_STS-B_samples_per_second": 2579.365, "eval_STS-B_steps_per_second": 21.229, "step": 7541 }, { "epoch": 2.0, "grad_norm": 0.29912641644477844, "learning_rate": 1.92609255130006e-05, "loss": 1.5717, "step": 15082 }, { "epoch": 2.0, "eval_AFQMC_loss": 0.023414788767695427, "eval_AFQMC_runtime": 99.0838, "eval_AFQMC_samples_per_second": 43.559, "eval_AFQMC_steps_per_second": 0.343, "eval_emb_eval_pearson_cosine": 0.541482089845575, "eval_emb_eval_spearman_cosine": 0.584542245914602, "step": 15082 }, { "epoch": 2.0, "eval_ATEC_loss": 0.01733938232064247, "eval_ATEC_runtime": 6.5234, "eval_ATEC_samples_per_second": 3065.879, "eval_ATEC_steps_per_second": 24.067, "step": 15082 }, { "epoch": 2.0, "eval_BQ_loss": 0.015580276027321815, "eval_BQ_runtime": 3.419, "eval_BQ_samples_per_second": 2924.795, "eval_BQ_steps_per_second": 23.106, "step": 15082 }, { "epoch": 2.0, "eval_Cmnli_loss": 0.016321830451488495, "eval_Cmnli_runtime": 3.7867, "eval_Cmnli_samples_per_second": 2195.824, "eval_Cmnli_steps_per_second": 17.165, "step": 15082 }, { "epoch": 2.0, "eval_LCQMC_loss": 0.02696206048130989, "eval_LCQMC_runtime": 2.3672, "eval_LCQMC_samples_per_second": 3718.264, "eval_LCQMC_steps_per_second": 29.148, "step": 15082 }, { "epoch": 2.0, "eval_Ocnli_loss": 0.018722666427493095, "eval_Ocnli_runtime": 0.6105, "eval_Ocnli_samples_per_second": 3025.526, "eval_Ocnli_steps_per_second": 24.571, "step": 15082 }, { "epoch": 2.0, "eval_PAWSX_loss": 0.06007164344191551, "eval_PAWSX_runtime": 0.9605, "eval_PAWSX_samples_per_second": 2082.25, "eval_PAWSX_steps_per_second": 16.658, "step": 15082 }, { "epoch": 2.0, "eval_QBQTC_loss": 8.381814002990723, "eval_QBQTC_runtime": 7.644, "eval_QBQTC_samples_per_second": 2616.427, "eval_QBQTC_steps_per_second": 20.539, "step": 15082 }, { "epoch": 2.0, "eval_STS-B_loss": 9.617568969726562, "eval_STS-B_runtime": 0.5589, "eval_STS-B_samples_per_second": 2608.657, "eval_STS-B_steps_per_second": 21.47, "step": 15082 }, { "epoch": 3.0, "grad_norm": 0.2338990569114685, "learning_rate": 1.7779829671031307e-05, "loss": 1.5301, "step": 22623 }, { "epoch": 3.0, "eval_AFQMC_loss": 0.02241475135087967, "eval_AFQMC_runtime": 99.9714, "eval_AFQMC_samples_per_second": 43.172, "eval_AFQMC_steps_per_second": 0.34, "eval_emb_eval_pearson_cosine": 0.5913082581080403, "eval_emb_eval_spearman_cosine": 0.6383284889220768, "step": 22623 }, { "epoch": 3.0, "eval_ATEC_loss": 0.016517719253897667, "eval_ATEC_runtime": 6.5842, "eval_ATEC_samples_per_second": 3037.594, "eval_ATEC_steps_per_second": 23.845, "step": 22623 }, { "epoch": 3.0, "eval_BQ_loss": 0.014284521341323853, "eval_BQ_runtime": 3.406, "eval_BQ_samples_per_second": 2936.005, "eval_BQ_steps_per_second": 23.194, "step": 22623 }, { "epoch": 3.0, "eval_Cmnli_loss": 0.01529396791011095, "eval_Cmnli_runtime": 3.7388, "eval_Cmnli_samples_per_second": 2223.987, "eval_Cmnli_steps_per_second": 17.385, "step": 22623 }, { "epoch": 3.0, "eval_LCQMC_loss": 0.020633986219763756, "eval_LCQMC_runtime": 2.3364, "eval_LCQMC_samples_per_second": 3767.314, "eval_LCQMC_steps_per_second": 29.532, "step": 22623 }, { "epoch": 3.0, "eval_Ocnli_loss": 0.016242269426584244, "eval_Ocnli_runtime": 0.6209, "eval_Ocnli_samples_per_second": 2974.536, "eval_Ocnli_steps_per_second": 24.157, "step": 22623 }, { "epoch": 3.0, "eval_PAWSX_loss": 0.05045855790376663, "eval_PAWSX_runtime": 0.9593, "eval_PAWSX_samples_per_second": 2084.846, "eval_PAWSX_steps_per_second": 16.679, "step": 22623 }, { "epoch": 3.0, "eval_QBQTC_loss": 8.840312957763672, "eval_QBQTC_runtime": 7.6443, "eval_QBQTC_samples_per_second": 2616.334, "eval_QBQTC_steps_per_second": 20.538, "step": 22623 }, { "epoch": 3.0, "eval_STS-B_loss": 9.837035179138184, "eval_STS-B_runtime": 0.5658, "eval_STS-B_samples_per_second": 2577.011, "eval_STS-B_steps_per_second": 21.21, "step": 22623 }, { "epoch": 4.0, "grad_norm": 0.1766250729560852, "learning_rate": 1.629912674479141e-05, "loss": 1.4768, "step": 30164 }, { "epoch": 4.0, "eval_AFQMC_loss": 0.021405402570962906, "eval_AFQMC_runtime": 100.3018, "eval_AFQMC_samples_per_second": 43.03, "eval_AFQMC_steps_per_second": 0.339, "eval_emb_eval_pearson_cosine": 0.6125367634857398, "eval_emb_eval_spearman_cosine": 0.6570670709310811, "step": 30164 }, { "epoch": 4.0, "eval_ATEC_loss": 0.015135602094233036, "eval_ATEC_runtime": 6.5473, "eval_ATEC_samples_per_second": 3054.678, "eval_ATEC_steps_per_second": 23.979, "step": 30164 }, { "epoch": 4.0, "eval_BQ_loss": 0.013757712207734585, "eval_BQ_runtime": 3.4539, "eval_BQ_samples_per_second": 2895.245, "eval_BQ_steps_per_second": 22.872, "step": 30164 }, { "epoch": 4.0, "eval_Cmnli_loss": 0.015052303671836853, "eval_Cmnli_runtime": 3.7534, "eval_Cmnli_samples_per_second": 2215.324, "eval_Cmnli_steps_per_second": 17.318, "step": 30164 }, { "epoch": 4.0, "eval_LCQMC_loss": 0.019115839153528214, "eval_LCQMC_runtime": 2.3344, "eval_LCQMC_samples_per_second": 3770.582, "eval_LCQMC_steps_per_second": 29.558, "step": 30164 }, { "epoch": 4.0, "eval_Ocnli_loss": 0.015967663377523422, "eval_Ocnli_runtime": 0.603, "eval_Ocnli_samples_per_second": 3063.145, "eval_Ocnli_steps_per_second": 24.877, "step": 30164 }, { "epoch": 4.0, "eval_PAWSX_loss": 0.04721539840102196, "eval_PAWSX_runtime": 0.9557, "eval_PAWSX_samples_per_second": 2092.706, "eval_PAWSX_steps_per_second": 16.742, "step": 30164 }, { "epoch": 4.0, "eval_QBQTC_loss": 9.168432235717773, "eval_QBQTC_runtime": 7.6741, "eval_QBQTC_samples_per_second": 2606.178, "eval_QBQTC_steps_per_second": 20.458, "step": 30164 }, { "epoch": 4.0, "eval_STS-B_loss": 9.83575439453125, "eval_STS-B_runtime": 0.5632, "eval_STS-B_samples_per_second": 2588.593, "eval_STS-B_steps_per_second": 21.305, "step": 30164 }, { "epoch": 5.0, "grad_norm": 0.13791993260383606, "learning_rate": 1.481822736068682e-05, "loss": 1.4141, "step": 37705 }, { "epoch": 5.0, "eval_AFQMC_loss": 0.0204468946903944, "eval_AFQMC_runtime": 99.1315, "eval_AFQMC_samples_per_second": 43.538, "eval_AFQMC_steps_per_second": 0.343, "eval_emb_eval_pearson_cosine": 0.6315631932372178, "eval_emb_eval_spearman_cosine": 0.6690982617504025, "step": 37705 }, { "epoch": 5.0, "eval_ATEC_loss": 0.014196612872183323, "eval_ATEC_runtime": 6.5521, "eval_ATEC_samples_per_second": 3052.435, "eval_ATEC_steps_per_second": 23.962, "step": 37705 }, { "epoch": 5.0, "eval_BQ_loss": 0.013593867421150208, "eval_BQ_runtime": 3.4128, "eval_BQ_samples_per_second": 2930.177, "eval_BQ_steps_per_second": 23.148, "step": 37705 }, { "epoch": 5.0, "eval_Cmnli_loss": 0.015099190175533295, "eval_Cmnli_runtime": 3.8007, "eval_Cmnli_samples_per_second": 2187.75, "eval_Cmnli_steps_per_second": 17.102, "step": 37705 }, { "epoch": 5.0, "eval_LCQMC_loss": 0.017175855115056038, "eval_LCQMC_runtime": 2.4095, "eval_LCQMC_samples_per_second": 3653.013, "eval_LCQMC_steps_per_second": 28.636, "step": 37705 }, { "epoch": 5.0, "eval_Ocnli_loss": 0.015738315880298615, "eval_Ocnli_runtime": 0.619, "eval_Ocnli_samples_per_second": 2983.626, "eval_Ocnli_steps_per_second": 24.231, "step": 37705 }, { "epoch": 5.0, "eval_PAWSX_loss": 0.046284567564725876, "eval_PAWSX_runtime": 0.959, "eval_PAWSX_samples_per_second": 2085.505, "eval_PAWSX_steps_per_second": 16.684, "step": 37705 }, { "epoch": 5.0, "eval_QBQTC_loss": 9.86181640625, "eval_QBQTC_runtime": 7.9334, "eval_QBQTC_samples_per_second": 2520.974, "eval_QBQTC_steps_per_second": 19.79, "step": 37705 }, { "epoch": 5.0, "eval_STS-B_loss": 9.911561965942383, "eval_STS-B_runtime": 0.5701, "eval_STS-B_samples_per_second": 2557.501, "eval_STS-B_steps_per_second": 21.049, "step": 37705 }, { "epoch": 6.0, "grad_norm": 0.17065556347370148, "learning_rate": 1.3337327976582224e-05, "loss": 1.3461, "step": 45246 }, { "epoch": 6.0, "eval_AFQMC_loss": 0.02002647891640663, "eval_AFQMC_runtime": 98.4869, "eval_AFQMC_samples_per_second": 43.823, "eval_AFQMC_steps_per_second": 0.345, "eval_emb_eval_pearson_cosine": 0.6436085641934666, "eval_emb_eval_spearman_cosine": 0.6715343325157711, "step": 45246 }, { "epoch": 6.0, "eval_ATEC_loss": 0.014070386998355389, "eval_ATEC_runtime": 6.5822, "eval_ATEC_samples_per_second": 3038.48, "eval_ATEC_steps_per_second": 23.852, "step": 45246 }, { "epoch": 6.0, "eval_BQ_loss": 0.013585967943072319, "eval_BQ_runtime": 3.6468, "eval_BQ_samples_per_second": 2742.113, "eval_BQ_steps_per_second": 21.663, "step": 45246 }, { "epoch": 6.0, "eval_Cmnli_loss": 0.01478442084044218, "eval_Cmnli_runtime": 3.7572, "eval_Cmnli_samples_per_second": 2213.061, "eval_Cmnli_steps_per_second": 17.3, "step": 45246 }, { "epoch": 6.0, "eval_LCQMC_loss": 0.016025548800826073, "eval_LCQMC_runtime": 2.3232, "eval_LCQMC_samples_per_second": 3788.767, "eval_LCQMC_steps_per_second": 29.701, "step": 45246 }, { "epoch": 6.0, "eval_Ocnli_loss": 0.015464934520423412, "eval_Ocnli_runtime": 0.6071, "eval_Ocnli_samples_per_second": 3042.383, "eval_Ocnli_steps_per_second": 24.708, "step": 45246 }, { "epoch": 6.0, "eval_PAWSX_loss": 0.04284413903951645, "eval_PAWSX_runtime": 0.9891, "eval_PAWSX_samples_per_second": 2021.991, "eval_PAWSX_steps_per_second": 16.176, "step": 45246 }, { "epoch": 6.0, "eval_QBQTC_loss": 10.840497016906738, "eval_QBQTC_runtime": 7.7281, "eval_QBQTC_samples_per_second": 2587.958, "eval_QBQTC_steps_per_second": 20.315, "step": 45246 }, { "epoch": 6.0, "eval_STS-B_loss": 9.943366050720215, "eval_STS-B_runtime": 0.5591, "eval_STS-B_samples_per_second": 2607.892, "eval_STS-B_steps_per_second": 21.464, "step": 45246 }, { "epoch": 7.0, "grad_norm": 0.13601745665073395, "learning_rate": 1.185642859247763e-05, "loss": 1.2856, "step": 52787 }, { "epoch": 7.0, "eval_AFQMC_loss": 0.019535699859261513, "eval_AFQMC_runtime": 99.2494, "eval_AFQMC_samples_per_second": 43.486, "eval_AFQMC_steps_per_second": 0.343, "eval_emb_eval_pearson_cosine": 0.6473204770905479, "eval_emb_eval_spearman_cosine": 0.6723246242283224, "step": 52787 }, { "epoch": 7.0, "eval_ATEC_loss": 0.013532574288547039, "eval_ATEC_runtime": 6.5765, "eval_ATEC_samples_per_second": 3041.133, "eval_ATEC_steps_per_second": 23.873, "step": 52787 }, { "epoch": 7.0, "eval_BQ_loss": 0.013674370013177395, "eval_BQ_runtime": 3.4566, "eval_BQ_samples_per_second": 2892.978, "eval_BQ_steps_per_second": 22.855, "step": 52787 }, { "epoch": 7.0, "eval_Cmnli_loss": 0.014647725969552994, "eval_Cmnli_runtime": 3.7546, "eval_Cmnli_samples_per_second": 2214.61, "eval_Cmnli_steps_per_second": 17.312, "step": 52787 }, { "epoch": 7.0, "eval_LCQMC_loss": 0.0150056267157197, "eval_LCQMC_runtime": 2.3629, "eval_LCQMC_samples_per_second": 3725.146, "eval_LCQMC_steps_per_second": 29.202, "step": 52787 }, { "epoch": 7.0, "eval_Ocnli_loss": 0.015559999272227287, "eval_Ocnli_runtime": 0.6231, "eval_Ocnli_samples_per_second": 2964.212, "eval_Ocnli_steps_per_second": 24.073, "step": 52787 }, { "epoch": 7.0, "eval_PAWSX_loss": 0.04360537603497505, "eval_PAWSX_runtime": 0.9535, "eval_PAWSX_samples_per_second": 2097.49, "eval_PAWSX_steps_per_second": 16.78, "step": 52787 }, { "epoch": 7.0, "eval_QBQTC_loss": 11.554633140563965, "eval_QBQTC_runtime": 7.7436, "eval_QBQTC_samples_per_second": 2582.791, "eval_QBQTC_steps_per_second": 20.275, "step": 52787 }, { "epoch": 7.0, "eval_STS-B_loss": 10.235774993896484, "eval_STS-B_runtime": 0.5543, "eval_STS-B_samples_per_second": 2630.224, "eval_STS-B_steps_per_second": 21.648, "step": 52787 }, { "epoch": 8.0, "grad_norm": 0.11353704333305359, "learning_rate": 1.0375332750508335e-05, "loss": 1.2182, "step": 60328 }, { "epoch": 8.0, "eval_AFQMC_loss": 0.019351305440068245, "eval_AFQMC_runtime": 99.553, "eval_AFQMC_samples_per_second": 43.354, "eval_AFQMC_steps_per_second": 0.342, "eval_emb_eval_pearson_cosine": 0.6494222678001456, "eval_emb_eval_spearman_cosine": 0.6703225826686093, "step": 60328 }, { "epoch": 8.0, "eval_ATEC_loss": 0.013181576505303383, "eval_ATEC_runtime": 6.522, "eval_ATEC_samples_per_second": 3066.526, "eval_ATEC_steps_per_second": 24.072, "step": 60328 }, { "epoch": 8.0, "eval_BQ_loss": 0.013695988804101944, "eval_BQ_runtime": 3.4134, "eval_BQ_samples_per_second": 2929.642, "eval_BQ_steps_per_second": 23.144, "step": 60328 }, { "epoch": 8.0, "eval_Cmnli_loss": 0.014423331245779991, "eval_Cmnli_runtime": 3.7623, "eval_Cmnli_samples_per_second": 2210.105, "eval_Cmnli_steps_per_second": 17.277, "step": 60328 }, { "epoch": 8.0, "eval_LCQMC_loss": 0.015052268281579018, "eval_LCQMC_runtime": 2.3257, "eval_LCQMC_samples_per_second": 3784.705, "eval_LCQMC_steps_per_second": 29.669, "step": 60328 }, { "epoch": 8.0, "eval_Ocnli_loss": 0.015219747088849545, "eval_Ocnli_runtime": 0.6121, "eval_Ocnli_samples_per_second": 3017.401, "eval_Ocnli_steps_per_second": 24.505, "step": 60328 }, { "epoch": 8.0, "eval_PAWSX_loss": 0.042021822184324265, "eval_PAWSX_runtime": 0.9526, "eval_PAWSX_samples_per_second": 2099.597, "eval_PAWSX_steps_per_second": 16.797, "step": 60328 }, { "epoch": 8.0, "eval_QBQTC_loss": 12.265329360961914, "eval_QBQTC_runtime": 7.6635, "eval_QBQTC_samples_per_second": 2609.768, "eval_QBQTC_steps_per_second": 20.487, "step": 60328 }, { "epoch": 8.0, "eval_STS-B_loss": 10.596503257751465, "eval_STS-B_runtime": 0.5656, "eval_STS-B_samples_per_second": 2577.705, "eval_STS-B_steps_per_second": 21.216, "step": 60328 }, { "epoch": 9.0, "grad_norm": 0.10075096040964127, "learning_rate": 8.89443336640374e-06, "loss": 1.1546, "step": 67869 }, { "epoch": 9.0, "eval_AFQMC_loss": 0.018916338682174683, "eval_AFQMC_runtime": 99.1097, "eval_AFQMC_samples_per_second": 43.548, "eval_AFQMC_steps_per_second": 0.343, "eval_emb_eval_pearson_cosine": 0.6461004194477242, "eval_emb_eval_spearman_cosine": 0.6620616757187626, "step": 67869 }, { "epoch": 9.0, "eval_ATEC_loss": 0.01316931750625372, "eval_ATEC_runtime": 6.5342, "eval_ATEC_samples_per_second": 3060.808, "eval_ATEC_steps_per_second": 24.027, "step": 67869 }, { "epoch": 9.0, "eval_BQ_loss": 0.013916433788836002, "eval_BQ_runtime": 3.4228, "eval_BQ_samples_per_second": 2921.626, "eval_BQ_steps_per_second": 23.081, "step": 67869 }, { "epoch": 9.0, "eval_Cmnli_loss": 0.014616083353757858, "eval_Cmnli_runtime": 3.7166, "eval_Cmnli_samples_per_second": 2237.257, "eval_Cmnli_steps_per_second": 17.489, "step": 67869 }, { "epoch": 9.0, "eval_LCQMC_loss": 0.014678360894322395, "eval_LCQMC_runtime": 2.3376, "eval_LCQMC_samples_per_second": 3765.326, "eval_LCQMC_steps_per_second": 29.517, "step": 67869 }, { "epoch": 9.0, "eval_Ocnli_loss": 0.015178242698311806, "eval_Ocnli_runtime": 0.6061, "eval_Ocnli_samples_per_second": 3047.588, "eval_Ocnli_steps_per_second": 24.75, "step": 67869 }, { "epoch": 9.0, "eval_PAWSX_loss": 0.04116755351424217, "eval_PAWSX_runtime": 0.9482, "eval_PAWSX_samples_per_second": 2109.261, "eval_PAWSX_steps_per_second": 16.874, "step": 67869 }, { "epoch": 9.0, "eval_QBQTC_loss": 12.928996086120605, "eval_QBQTC_runtime": 7.7135, "eval_QBQTC_samples_per_second": 2592.855, "eval_QBQTC_steps_per_second": 20.354, "step": 67869 }, { "epoch": 9.0, "eval_STS-B_loss": 10.769329071044922, "eval_STS-B_runtime": 0.557, "eval_STS-B_samples_per_second": 2617.49, "eval_STS-B_steps_per_second": 21.543, "step": 67869 }, { "epoch": 10.0, "grad_norm": 116.47360229492188, "learning_rate": 7.413533982299147e-06, "loss": 1.091, "step": 75410 }, { "epoch": 10.0, "eval_AFQMC_loss": 0.01876525580883026, "eval_AFQMC_runtime": 106.9735, "eval_AFQMC_samples_per_second": 40.346, "eval_AFQMC_steps_per_second": 0.318, "eval_emb_eval_pearson_cosine": 0.6425398472325708, "eval_emb_eval_spearman_cosine": 0.6571917650236837, "step": 75410 }, { "epoch": 10.0, "eval_ATEC_loss": 0.012936480343341827, "eval_ATEC_runtime": 6.5457, "eval_ATEC_samples_per_second": 3055.455, "eval_ATEC_steps_per_second": 23.985, "step": 75410 }, { "epoch": 10.0, "eval_BQ_loss": 0.014092645607888699, "eval_BQ_runtime": 3.4559, "eval_BQ_samples_per_second": 2893.566, "eval_BQ_steps_per_second": 22.859, "step": 75410 }, { "epoch": 10.0, "eval_Cmnli_loss": 0.01457986794412136, "eval_Cmnli_runtime": 3.7514, "eval_Cmnli_samples_per_second": 2216.51, "eval_Cmnli_steps_per_second": 17.327, "step": 75410 }, { "epoch": 10.0, "eval_LCQMC_loss": 0.014272717759013176, "eval_LCQMC_runtime": 2.3604, "eval_LCQMC_samples_per_second": 3729.02, "eval_LCQMC_steps_per_second": 29.232, "step": 75410 }, { "epoch": 10.0, "eval_Ocnli_loss": 0.015037407167255878, "eval_Ocnli_runtime": 0.6116, "eval_Ocnli_samples_per_second": 3020.096, "eval_Ocnli_steps_per_second": 24.527, "step": 75410 }, { "epoch": 10.0, "eval_PAWSX_loss": 0.04113338142633438, "eval_PAWSX_runtime": 0.9594, "eval_PAWSX_samples_per_second": 2084.624, "eval_PAWSX_steps_per_second": 16.677, "step": 75410 }, { "epoch": 10.0, "eval_QBQTC_loss": 13.890397071838379, "eval_QBQTC_runtime": 7.674, "eval_QBQTC_samples_per_second": 2606.201, "eval_QBQTC_steps_per_second": 20.459, "step": 75410 }, { "epoch": 10.0, "eval_STS-B_loss": 10.956206321716309, "eval_STS-B_runtime": 0.5649, "eval_STS-B_samples_per_second": 2580.888, "eval_STS-B_steps_per_second": 21.242, "step": 75410 }, { "epoch": 11.0, "grad_norm": 0.15993598103523254, "learning_rate": 5.933027513923951e-06, "loss": 1.0267, "step": 82951 }, { "epoch": 11.0, "eval_AFQMC_loss": 0.01858402043581009, "eval_AFQMC_runtime": 99.2423, "eval_AFQMC_samples_per_second": 43.49, "eval_AFQMC_steps_per_second": 0.343, "eval_emb_eval_pearson_cosine": 0.635671675332461, "eval_emb_eval_spearman_cosine": 0.6462278753331322, "step": 82951 }, { "epoch": 11.0, "eval_ATEC_loss": 0.012706396169960499, "eval_ATEC_runtime": 6.5392, "eval_ATEC_samples_per_second": 3058.477, "eval_ATEC_steps_per_second": 24.009, "step": 82951 }, { "epoch": 11.0, "eval_BQ_loss": 0.014143843203783035, "eval_BQ_runtime": 3.3932, "eval_BQ_samples_per_second": 2947.098, "eval_BQ_steps_per_second": 23.282, "step": 82951 }, { "epoch": 11.0, "eval_Cmnli_loss": 0.014601893723011017, "eval_Cmnli_runtime": 3.7177, "eval_Cmnli_samples_per_second": 2236.627, "eval_Cmnli_steps_per_second": 17.484, "step": 82951 }, { "epoch": 11.0, "eval_LCQMC_loss": 0.01407212857156992, "eval_LCQMC_runtime": 2.3166, "eval_LCQMC_samples_per_second": 3799.506, "eval_LCQMC_steps_per_second": 29.785, "step": 82951 }, { "epoch": 11.0, "eval_Ocnli_loss": 0.015255914069712162, "eval_Ocnli_runtime": 0.6096, "eval_Ocnli_samples_per_second": 3029.719, "eval_Ocnli_steps_per_second": 24.605, "step": 82951 }, { "epoch": 11.0, "eval_PAWSX_loss": 0.04009222611784935, "eval_PAWSX_runtime": 0.9569, "eval_PAWSX_samples_per_second": 2090.135, "eval_PAWSX_steps_per_second": 16.721, "step": 82951 }, { "epoch": 11.0, "eval_QBQTC_loss": 14.71971607208252, "eval_QBQTC_runtime": 7.651, "eval_QBQTC_samples_per_second": 2614.025, "eval_QBQTC_steps_per_second": 20.52, "step": 82951 }, { "epoch": 11.0, "eval_STS-B_loss": 11.138467788696289, "eval_STS-B_runtime": 0.5605, "eval_STS-B_samples_per_second": 2601.08, "eval_STS-B_steps_per_second": 21.408, "step": 82951 }, { "epoch": 12.0, "grad_norm": 0.22925728559494019, "learning_rate": 4.451735214089958e-06, "loss": 0.9688, "step": 90492 }, { "epoch": 12.0, "eval_AFQMC_loss": 0.018353162333369255, "eval_AFQMC_runtime": 98.1243, "eval_AFQMC_samples_per_second": 43.985, "eval_AFQMC_steps_per_second": 0.346, "eval_emb_eval_pearson_cosine": 0.6340788346277473, "eval_emb_eval_spearman_cosine": 0.6444001384260496, "step": 90492 }, { "epoch": 12.0, "eval_ATEC_loss": 0.012524303048849106, "eval_ATEC_runtime": 6.5509, "eval_ATEC_samples_per_second": 3052.999, "eval_ATEC_steps_per_second": 23.966, "step": 90492 }, { "epoch": 12.0, "eval_BQ_loss": 0.014260655269026756, "eval_BQ_runtime": 3.4399, "eval_BQ_samples_per_second": 2907.038, "eval_BQ_steps_per_second": 22.966, "step": 90492 }, { "epoch": 12.0, "eval_Cmnli_loss": 0.014628582634031773, "eval_Cmnli_runtime": 3.7252, "eval_Cmnli_samples_per_second": 2232.102, "eval_Cmnli_steps_per_second": 17.449, "step": 90492 }, { "epoch": 12.0, "eval_LCQMC_loss": 0.013899387791752815, "eval_LCQMC_runtime": 2.349, "eval_LCQMC_samples_per_second": 3747.087, "eval_LCQMC_steps_per_second": 29.374, "step": 90492 }, { "epoch": 12.0, "eval_Ocnli_loss": 0.015396999195218086, "eval_Ocnli_runtime": 0.6119, "eval_Ocnli_samples_per_second": 3018.253, "eval_Ocnli_steps_per_second": 24.512, "step": 90492 }, { "epoch": 12.0, "eval_PAWSX_loss": 0.04036952182650566, "eval_PAWSX_runtime": 0.9649, "eval_PAWSX_samples_per_second": 2072.698, "eval_PAWSX_steps_per_second": 16.582, "step": 90492 }, { "epoch": 12.0, "eval_QBQTC_loss": 15.251015663146973, "eval_QBQTC_runtime": 7.662, "eval_QBQTC_samples_per_second": 2610.3, "eval_QBQTC_steps_per_second": 20.491, "step": 90492 }, { "epoch": 12.0, "eval_STS-B_loss": 11.509617805480957, "eval_STS-B_runtime": 0.5625, "eval_STS-B_samples_per_second": 2592.013, "eval_STS-B_steps_per_second": 21.333, "step": 90492 } ], "logging_steps": 500, "max_steps": 113115, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 128, "trial_name": null, "trial_params": null }