colphi3.5 / results.json
yydxlv's picture
Update results.json
a8200d6 verified
{
"syntheticDocQA_artificial_intelligence_test": {
"ndcg_at_1": 0.96,
"ndcg_at_3": 0.98524,
"ndcg_at_5": 0.98524,
"ndcg_at_10": 0.98524,
"ndcg_at_20": 0.98524,
"ndcg_at_50": 0.98524,
"ndcg_at_100": 0.98524,
"map_at_1": 0.96,
"map_at_3": 0.98,
"map_at_5": 0.98,
"map_at_10": 0.98,
"map_at_20": 0.98,
"map_at_50": 0.98,
"map_at_100": 0.98,
"recall_at_1": 0.96,
"recall_at_3": 1.0,
"recall_at_5": 1.0,
"recall_at_10": 1.0,
"recall_at_20": 1.0,
"recall_at_50": 1.0,
"recall_at_100": 1.0,
"precision_at_1": 0.96,
"precision_at_3": 0.33333,
"precision_at_5": 0.2,
"precision_at_10": 0.1,
"precision_at_20": 0.05,
"precision_at_50": 0.02,
"precision_at_100": 0.01,
"mrr_at_1": 0.96,
"mrr_at_3": 0.98,
"mrr_at_5": 0.98,
"mrr_at_10": 0.98,
"mrr_at_20": 0.98,
"mrr_at_50": 0.98,
"mrr_at_100": 0.98,
"naucs_at_1_max": 0.06302521008403594,
"naucs_at_1_std": -0.04843604108309577,
"naucs_at_1_diff1": 0.9305555555555578,
"naucs_at_3_max": 1.0,
"naucs_at_3_std": 1.0,
"naucs_at_3_diff1": 1.0,
"naucs_at_5_max": 1.0,
"naucs_at_5_std": 1.0,
"naucs_at_5_diff1": 1.0,
"naucs_at_10_max": 1.0,
"naucs_at_10_std": 1.0,
"naucs_at_10_diff1": 1.0,
"naucs_at_20_max": 1.0,
"naucs_at_20_std": 1.0,
"naucs_at_20_diff1": 1.0,
"naucs_at_50_max": null,
"naucs_at_50_std": null,
"naucs_at_50_diff1": null,
"naucs_at_100_max": null,
"naucs_at_100_std": null,
"naucs_at_100_diff1": null
},
"tabfquad_test_subsampled": {
"ndcg_at_1": 0.76429,
"ndcg_at_3": 0.81548,
"ndcg_at_5": 0.83054,
"ndcg_at_10": 0.84162,
"ndcg_at_20": 0.85069,
"ndcg_at_50": 0.85663,
"ndcg_at_100": 0.85846,
"map_at_1": 0.76429,
"map_at_3": 0.80238,
"map_at_5": 0.81095,
"map_at_10": 0.81526,
"map_at_20": 0.81777,
"map_at_50": 0.81884,
"map_at_100": 0.81903,
"recall_at_1": 0.76429,
"recall_at_3": 0.85357,
"recall_at_5": 0.88929,
"recall_at_10": 0.925,
"recall_at_20": 0.96071,
"recall_at_50": 0.98929,
"recall_at_100": 1.0,
"precision_at_1": 0.76429,
"precision_at_3": 0.28452,
"precision_at_5": 0.17786,
"precision_at_10": 0.0925,
"precision_at_20": 0.04804,
"precision_at_50": 0.01979,
"precision_at_100": 0.01,
"mrr_at_1": 0.7642857142857142,
"mrr_at_3": 0.8047619047619048,
"mrr_at_5": 0.812261904761905,
"mrr_at_10": 0.8157568027210885,
"mrr_at_20": 0.8182429277072134,
"mrr_at_50": 0.8196521812519686,
"mrr_at_100": 0.819844389714301,
"naucs_at_1_max": 0.3930648524086476,
"naucs_at_1_std": 0.13014375068583456,
"naucs_at_1_diff1": 0.8538351805113578,
"naucs_at_3_max": 0.3984224656102249,
"naucs_at_3_std": 0.1907296096552706,
"naucs_at_3_diff1": 0.7447435318714221,
"naucs_at_5_max": 0.3885787498856053,
"naucs_at_5_std": 0.16155699948140706,
"naucs_at_5_diff1": 0.795170983191481,
"naucs_at_10_max": 0.3904450669156529,
"naucs_at_10_std": 0.15479525143390607,
"naucs_at_10_diff1": 0.7791561068871982,
"naucs_at_20_max": 0.28736949325184535,
"naucs_at_20_std": -0.07932263814616634,
"naucs_at_20_diff1": 0.7797725150666338,
"naucs_at_50_max": 0.34267040149392364,
"naucs_at_50_std": -0.1545284780578993,
"naucs_at_50_diff1": 0.8078120136943696,
"naucs_at_100_max": 1.0,
"naucs_at_100_std": 1.0,
"naucs_at_100_diff1": 1.0
},
"shiftproject_test": {
"ndcg_at_1": 0.53,
"ndcg_at_3": 0.6444,
"ndcg_at_5": 0.68185,
"ndcg_at_10": 0.70901,
"ndcg_at_20": 0.71713,
"ndcg_at_50": 0.72325,
"ndcg_at_100": 0.7248,
"map_at_1": 0.53,
"map_at_3": 0.615,
"map_at_5": 0.636,
"map_at_10": 0.64798,
"map_at_20": 0.6505,
"map_at_50": 0.65154,
"map_at_100": 0.65165,
"recall_at_1": 0.53,
"recall_at_3": 0.73,
"recall_at_5": 0.82,
"recall_at_10": 0.9,
"recall_at_20": 0.93,
"recall_at_50": 0.96,
"recall_at_100": 0.97,
"precision_at_1": 0.53,
"precision_at_3": 0.24333,
"precision_at_5": 0.164,
"precision_at_10": 0.09,
"precision_at_20": 0.0465,
"precision_at_50": 0.0192,
"precision_at_100": 0.0097,
"mrr_at_1": 0.55,
"mrr_at_3": 0.6300000000000001,
"mrr_at_5": 0.655,
"mrr_at_10": 0.6632341269841271,
"mrr_at_20": 0.6648575036075036,
"mrr_at_50": 0.6659570406445406,
"mrr_at_100": 0.6660746877033642,
"naucs_at_1_max": -0.058021416101939505,
"naucs_at_1_std": -0.29796973154270173,
"naucs_at_1_diff1": 0.632737219755598,
"naucs_at_3_max": 0.0032239674457922728,
"naucs_at_3_std": -0.39310778666771473,
"naucs_at_3_diff1": 0.5492736244077923,
"naucs_at_5_max": 0.18798876528252045,
"naucs_at_5_std": -0.11273267981054919,
"naucs_at_5_diff1": 0.6309890957153872,
"naucs_at_10_max": 0.11223155929038518,
"naucs_at_10_std": -0.22661064425770017,
"naucs_at_10_diff1": 0.565966386554623,
"naucs_at_20_max": -0.13685474189675803,
"naucs_at_20_std": -0.5446178471388509,
"naucs_at_20_diff1": 0.6702681072429001,
"naucs_at_50_max": -0.27065826330532117,
"naucs_at_50_std": -0.5157563025210059,
"naucs_at_50_diff1": 0.6458916900093365,
"naucs_at_100_max": -0.30376595082477015,
"naucs_at_100_std": -0.6305633364456832,
"naucs_at_100_diff1": 0.5714285714285773
},
"syntheticDocQA_government_reports_test": {
"ndcg_at_1": 0.9,
"ndcg_at_3": 0.93524,
"ndcg_at_5": 0.95203,
"ndcg_at_10": 0.95203,
"ndcg_at_20": 0.95203,
"ndcg_at_50": 0.95203,
"ndcg_at_100": 0.95203,
"map_at_1": 0.9,
"map_at_3": 0.92667,
"map_at_5": 0.93617,
"map_at_10": 0.93617,
"map_at_20": 0.93617,
"map_at_50": 0.93617,
"map_at_100": 0.93617,
"recall_at_1": 0.9,
"recall_at_3": 0.96,
"recall_at_5": 1.0,
"recall_at_10": 1.0,
"recall_at_20": 1.0,
"recall_at_50": 1.0,
"recall_at_100": 1.0,
"precision_at_1": 0.9,
"precision_at_3": 0.32,
"precision_at_5": 0.2,
"precision_at_10": 0.1,
"precision_at_20": 0.05,
"precision_at_50": 0.02,
"precision_at_100": 0.01,
"mrr_at_1": 0.9,
"mrr_at_3": 0.9333333333333332,
"mrr_at_5": 0.9403333333333334,
"mrr_at_10": 0.9403333333333334,
"mrr_at_20": 0.9403333333333334,
"mrr_at_50": 0.9403333333333334,
"mrr_at_100": 0.9403333333333334,
"naucs_at_1_max": 0.1526143790849667,
"naucs_at_1_std": -0.28823529411764676,
"naucs_at_1_diff1": 0.8867880485527548,
"naucs_at_3_max": 0.07119514472455671,
"naucs_at_3_std": -0.29236694677871167,
"naucs_at_3_diff1": 0.8190943043884199,
"naucs_at_5_max": 1.0,
"naucs_at_5_std": 1.0,
"naucs_at_5_diff1": 1.0,
"naucs_at_10_max": 1.0,
"naucs_at_10_std": 1.0,
"naucs_at_10_diff1": 1.0,
"naucs_at_20_max": 1.0,
"naucs_at_20_std": 1.0,
"naucs_at_20_diff1": 1.0,
"naucs_at_50_max": null,
"naucs_at_50_std": null,
"naucs_at_50_diff1": null,
"naucs_at_100_max": null,
"naucs_at_100_std": null,
"naucs_at_100_diff1": null
},
"infovqa_test_subsampled": {
"ndcg_at_1": 0.76316,
"ndcg_at_3": 0.82921,
"ndcg_at_5": 0.84289,
"ndcg_at_10": 0.85188,
"ndcg_at_20": 0.85644,
"ndcg_at_50": 0.86119,
"ndcg_at_100": 0.86283,
"map_at_1": 0.76316,
"map_at_3": 0.8141,
"map_at_5": 0.8219,
"map_at_10": 0.8255,
"map_at_20": 0.82673,
"map_at_50": 0.82746,
"map_at_100": 0.8276,
"recall_at_1": 0.76316,
"recall_at_3": 0.87247,
"recall_at_5": 0.90486,
"recall_at_10": 0.9332,
"recall_at_20": 0.95142,
"recall_at_50": 0.97571,
"recall_at_100": 0.98583,
"precision_at_1": 0.76316,
"precision_at_3": 0.29082,
"precision_at_5": 0.18097,
"precision_at_10": 0.09332,
"precision_at_20": 0.04757,
"precision_at_50": 0.01951,
"precision_at_100": 0.00986,
"mrr_at_1": 0.7611336032388664,
"mrr_at_3": 0.813090418353576,
"mrr_at_5": 0.8216936572199727,
"mrr_at_10": 0.8248160465265728,
"mrr_at_20": 0.825886692856507,
"mrr_at_50": 0.8265042447979247,
"mrr_at_100": 0.8266668975545796,
"naucs_at_1_max": 0.5439433197090425,
"naucs_at_1_std": 0.24776250817754733,
"naucs_at_1_diff1": 0.8926114371628262,
"naucs_at_3_max": 0.5680546695594344,
"naucs_at_3_std": 0.2541748591933084,
"naucs_at_3_diff1": 0.853534819493221,
"naucs_at_5_max": 0.705693682065501,
"naucs_at_5_std": 0.4124626625681076,
"naucs_at_5_diff1": 0.8406271757500486,
"naucs_at_10_max": 0.7825666550711661,
"naucs_at_10_std": 0.5614849682029542,
"naucs_at_10_diff1": 0.842869767889761,
"naucs_at_20_max": 0.7543170269410486,
"naucs_at_20_std": 0.5443151217010491,
"naucs_at_20_diff1": 0.8462829819306663,
"naucs_at_50_max": 0.8236318160505198,
"naucs_at_50_std": 0.7478265745755217,
"naucs_at_50_diff1": 0.8852960979962946,
"naucs_at_100_max": 0.7986413595283582,
"naucs_at_100_std": 0.6710512069467076,
"naucs_at_100_diff1": 0.8616984811111583
},
"syntheticDocQA_healthcare_industry_test": {
"ndcg_at_1": 0.92,
"ndcg_at_3": 0.95024,
"ndcg_at_5": 0.95885,
"ndcg_at_10": 0.95885,
"ndcg_at_20": 0.95885,
"ndcg_at_50": 0.96077,
"ndcg_at_100": 0.96077,
"map_at_1": 0.92,
"map_at_3": 0.94333,
"map_at_5": 0.94833,
"map_at_10": 0.94833,
"map_at_20": 0.94833,
"map_at_50": 0.94861,
"map_at_100": 0.94861,
"recall_at_1": 0.92,
"recall_at_3": 0.97,
"recall_at_5": 0.99,
"recall_at_10": 0.99,
"recall_at_20": 0.99,
"recall_at_50": 1.0,
"recall_at_100": 1.0,
"precision_at_1": 0.92,
"precision_at_3": 0.32333,
"precision_at_5": 0.198,
"precision_at_10": 0.099,
"precision_at_20": 0.0495,
"precision_at_50": 0.02,
"precision_at_100": 0.01,
"mrr_at_1": 0.95,
"mrr_at_3": 0.9633333333333333,
"mrr_at_5": 0.9658333333333333,
"mrr_at_10": 0.9658333333333333,
"mrr_at_20": 0.9658333333333333,
"mrr_at_50": 0.96625,
"mrr_at_100": 0.96625,
"naucs_at_1_max": 0.6046918767507006,
"naucs_at_1_std": 0.07207049486461263,
"naucs_at_1_diff1": 0.9279295051353874,
"naucs_at_3_max": 0.460939931528168,
"naucs_at_3_std": 0.08839091192032704,
"naucs_at_3_diff1": 0.8513849984438244,
"naucs_at_5_max": 1.0,
"naucs_at_5_std": 1.0,
"naucs_at_5_diff1": 0.5541549953314738,
"naucs_at_10_max": 1.0,
"naucs_at_10_std": 1.0,
"naucs_at_10_diff1": 0.5541549953314738,
"naucs_at_20_max": 1.0,
"naucs_at_20_std": 1.0,
"naucs_at_20_diff1": 0.5541549953314738,
"naucs_at_50_max": null,
"naucs_at_50_std": null,
"naucs_at_50_diff1": null,
"naucs_at_100_max": null,
"naucs_at_100_std": null,
"naucs_at_100_diff1": null
},
"syntheticDocQA_energy_test": {
"ndcg_at_1": 0.91,
"ndcg_at_3": 0.94786,
"ndcg_at_5": 0.94786,
"ndcg_at_10": 0.95142,
"ndcg_at_20": 0.95142,
"ndcg_at_50": 0.9536,
"ndcg_at_100": 0.95525,
"map_at_1": 0.91,
"map_at_3": 0.94,
"map_at_5": 0.94,
"map_at_10": 0.94167,
"map_at_20": 0.94167,
"map_at_50": 0.9421,
"map_at_100": 0.94226,
"recall_at_1": 0.91,
"recall_at_3": 0.97,
"recall_at_5": 0.97,
"recall_at_10": 0.98,
"recall_at_20": 0.98,
"recall_at_50": 0.99,
"recall_at_100": 1.0,
"precision_at_1": 0.91,
"precision_at_3": 0.32333,
"precision_at_5": 0.194,
"precision_at_10": 0.098,
"precision_at_20": 0.049,
"precision_at_50": 0.0198,
"precision_at_100": 0.01,
"mrr_at_1": 0.95,
"mrr_at_3": 0.96,
"mrr_at_5": 0.9625,
"mrr_at_10": 0.9625,
"mrr_at_20": 0.9631666666666666,
"mrr_at_50": 0.9631666666666666,
"mrr_at_100": 0.9633279569892474,
"naucs_at_1_max": 0.6794791990870418,
"naucs_at_1_std": -0.20370370370370447,
"naucs_at_1_diff1": 0.9854756717501807,
"naucs_at_3_max": 0.807812013694371,
"naucs_at_3_std": -0.45238095238094883,
"naucs_at_3_diff1": 0.9564270152505466,
"naucs_at_5_max": 0.807812013694364,
"naucs_at_5_std": -0.45238095238095277,
"naucs_at_5_diff1": 0.9564270152505424,
"naucs_at_10_max": 0.9346405228758136,
"naucs_at_10_std": 0.1914098972922579,
"naucs_at_10_diff1": 0.9346405228758136,
"naucs_at_20_max": 0.9346405228758136,
"naucs_at_20_std": 0.1914098972922579,
"naucs_at_20_diff1": 0.9346405228758136,
"naucs_at_50_max": 1.0,
"naucs_at_50_std": 0.554154995331464,
"naucs_at_50_diff1": 1.0,
"naucs_at_100_max": null,
"naucs_at_100_std": null,
"naucs_at_100_diff1": null
},
"arxivqa_test_subsampled": {
"ndcg_at_1": 0.822,
"ndcg_at_3": 0.86124,
"ndcg_at_5": 0.87217,
"ndcg_at_10": 0.87958,
"ndcg_at_20": 0.88759,
"ndcg_at_50": 0.8913,
"ndcg_at_100": 0.89258,
"map_at_1": 0.822,
"map_at_3": 0.85133,
"map_at_5": 0.85753,
"map_at_10": 0.86076,
"map_at_20": 0.86318,
"map_at_50": 0.86383,
"map_at_100": 0.86394,
"recall_at_1": 0.822,
"recall_at_3": 0.89,
"recall_at_5": 0.916,
"recall_at_10": 0.938,
"recall_at_20": 0.968,
"recall_at_50": 0.986,
"recall_at_100": 0.994,
"precision_at_1": 0.822,
"precision_at_3": 0.29667,
"precision_at_5": 0.1832,
"precision_at_10": 0.0938,
"precision_at_20": 0.0484,
"precision_at_50": 0.01972,
"precision_at_100": 0.00994,
"mrr_at_1": 0.81,
"mrr_at_3": 0.8446666666666667,
"mrr_at_5": 0.8526666666666665,
"mrr_at_10": 0.8558063492063492,
"mrr_at_20": 0.8576622827722674,
"mrr_at_50": 0.8582444004699996,
"mrr_at_100": 0.858351807213441,
"naucs_at_1_max": 0.6618424420480973,
"naucs_at_1_std": 0.1726204386872768,
"naucs_at_1_diff1": 0.8807854527648874,
"naucs_at_3_max": 0.6544884460097947,
"naucs_at_3_std": 0.2072674168885846,
"naucs_at_3_diff1": 0.8465338029378915,
"naucs_at_5_max": 0.6638877773331578,
"naucs_at_5_std": 0.23538304210572925,
"naucs_at_5_diff1": 0.8338224178560337,
"naucs_at_10_max": 0.6184000481913182,
"naucs_at_10_std": 0.1655371826149804,
"naucs_at_10_diff1": 0.8101713803801093,
"naucs_at_20_max": 0.7978232959850622,
"naucs_at_20_std": 0.47426470588234737,
"naucs_at_20_diff1": 0.8186566293183944,
"naucs_at_50_max": 0.9019607843137241,
"naucs_at_50_std": 0.7225556889422341,
"naucs_at_50_diff1": 0.8832866479925231,
"naucs_at_100_max": 1.0,
"naucs_at_100_std": 0.8078120136943184,
"naucs_at_100_diff1": 0.8202614379084721
},
"tatdqa_test": {
"ndcg_at_1": 0.60571,
"ndcg_at_3": 0.70518,
"ndcg_at_5": 0.73148,
"ndcg_at_10": 0.75212,
"ndcg_at_20": 0.76107,
"ndcg_at_50": 0.76775,
"ndcg_at_100": 0.7707,
"map_at_1": 0.60571,
"map_at_3": 0.68094,
"map_at_5": 0.69555,
"map_at_10": 0.70408,
"map_at_20": 0.70656,
"map_at_50": 0.7076,
"map_at_100": 0.70786,
"recall_at_1": 0.60571,
"recall_at_3": 0.77521,
"recall_at_5": 0.839,
"recall_at_10": 0.90279,
"recall_at_20": 0.93803,
"recall_at_50": 0.97205,
"recall_at_100": 0.99028,
"precision_at_1": 0.60571,
"precision_at_3": 0.2584,
"precision_at_5": 0.1678,
"precision_at_10": 0.09028,
"precision_at_20": 0.0469,
"precision_at_50": 0.01944,
"precision_at_100": 0.0099,
"mrr_at_1": 0.6057108140947752,
"mrr_at_3": 0.6830700688537875,
"mrr_at_5": 0.6966788173349543,
"mrr_at_10": 0.7058545005689602,
"mrr_at_20": 0.707882381086114,
"mrr_at_50": 0.7091241073972927,
"mrr_at_100": 0.7093644660054671,
"naucs_at_1_max": 0.3183541397363705,
"naucs_at_1_std": 0.09826700774797242,
"naucs_at_1_diff1": 0.7187936758795999,
"naucs_at_3_max": 0.37504605152871257,
"naucs_at_3_std": 0.14677854908146837,
"naucs_at_3_diff1": 0.6383523296619431,
"naucs_at_5_max": 0.42709986452798276,
"naucs_at_5_std": 0.22990979779837553,
"naucs_at_5_diff1": 0.6357454223598389,
"naucs_at_10_max": 0.48845166873972423,
"naucs_at_10_std": 0.33255307682895896,
"naucs_at_10_diff1": 0.6184766146789907,
"naucs_at_20_max": 0.45722581804053714,
"naucs_at_20_std": 0.3272040057998003,
"naucs_at_20_diff1": 0.6092689407956671,
"naucs_at_50_max": 0.4613805353039302,
"naucs_at_50_std": 0.35387081782577695,
"naucs_at_50_diff1": 0.5797820548654825,
"naucs_at_100_max": 0.5928666738326348,
"naucs_at_100_std": 0.40800919961891313,
"naucs_at_100_diff1": 0.5890653536441407
},
"docvqa_test_subsampled": {
"ndcg_at_1": 0.47672,
"ndcg_at_3": 0.55229,
"ndcg_at_5": 0.56851,
"ndcg_at_10": 0.58892,
"ndcg_at_20": 0.60374,
"ndcg_at_50": 0.6134,
"ndcg_at_100": 0.62175,
"map_at_1": 0.47672,
"map_at_3": 0.53326,
"map_at_5": 0.54213,
"map_at_10": 0.55075,
"map_at_20": 0.55494,
"map_at_50": 0.55648,
"map_at_100": 0.55724,
"recall_at_1": 0.47672,
"recall_at_3": 0.60754,
"recall_at_5": 0.64745,
"recall_at_10": 0.70953,
"recall_at_20": 0.76718,
"recall_at_50": 0.81596,
"recall_at_100": 0.86696,
"precision_at_1": 0.47672,
"precision_at_3": 0.20251,
"precision_at_5": 0.12949,
"precision_at_10": 0.07095,
"precision_at_20": 0.03836,
"precision_at_50": 0.01632,
"precision_at_100": 0.00867,
"mrr_at_1": 0.48337028824833705,
"mrr_at_3": 0.5354767184035477,
"mrr_at_5": 0.5487804878048779,
"mrr_at_10": 0.5557869637137927,
"mrr_at_20": 0.5593928559116855,
"mrr_at_50": 0.5615025624726138,
"mrr_at_100": 0.5621791399632005,
"naucs_at_1_max": 0.3785349172346823,
"naucs_at_1_std": 0.6557287667252649,
"naucs_at_1_diff1": 0.8526440408297176,
"naucs_at_3_max": 0.28374137300417,
"naucs_at_3_std": 0.7269399252140738,
"naucs_at_3_diff1": 0.7922662049837966,
"naucs_at_5_max": 0.23868786072943135,
"naucs_at_5_std": 0.7281901708135905,
"naucs_at_5_diff1": 0.7793870480543929,
"naucs_at_10_max": 0.1650957871644876,
"naucs_at_10_std": 0.7712043441704308,
"naucs_at_10_diff1": 0.7842842146130585,
"naucs_at_20_max": 0.0797800670615453,
"naucs_at_20_std": 0.8135232985740711,
"naucs_at_20_diff1": 0.7706236528786001,
"naucs_at_50_max": 0.07226615905116034,
"naucs_at_50_std": 0.8406489117674839,
"naucs_at_50_diff1": 0.7290414525233613,
"naucs_at_100_max": -0.023763600742901323,
"naucs_at_100_std": 0.8665455621668288,
"naucs_at_100_diff1": 0.6958809819906159
}
}