diff --git "a/measurement.json" "b/measurement.json" --- "a/measurement.json" +++ "b/measurement.json" @@ -1,143814 +1,93655 @@ { - "measurement": [ - { - "key": "model.layers.0.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.008654792793095112, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.008557724766433239, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.004226564429700375, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.004198359325528145, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.004178441129624844, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.0018531077075749636, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.010950956493616104, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.008487832732498646, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.004188043996691704, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.004161342047154903, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.004504639655351639, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.004852192476391792, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.004155186004936695, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.0024398472160100937, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.001833838177844882, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.0026055648922920227, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.0018257277552038431, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.0016031862469390035, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.001824369071982801, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.0016013655113056302, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.0017753636930137873, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.0018239897908642888, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.0014374866150319576, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0016000105533748865, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.0.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.011725133284926414, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.011597291566431522, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.005551365669816732, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.005516603123396635, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.005491877440363169, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.0018538747681304812, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.012603654526174068, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.0115074897184968, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.005506256129592657, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.0054695033468306065, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.005851877387613058, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.006082551088184118, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.005461962427943945, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.0028821539599448442, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.001825071987695992, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.003086615586653352, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.0018120500026270747, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.0013442238559946418, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.0018097830470651388, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.001340318936854601, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.001760594081133604, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.0018089368240907788, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.0010982082458212972, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0013384470948949456, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.0.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.061051443219184875, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.049987368285655975, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.03905067592859268, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.03245754912495613, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.02689855918288231, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.01871645078063011, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.05076146125793457, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.03868396207690239, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.028522148728370667, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.02140124887228012, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.021841559559106827, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.02625282108783722, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.01858377642929554, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.013221362605690956, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.011622069403529167, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.013075403869152069, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.007289874833077192, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.006521893665194511, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006321266759186983, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.005321749020367861, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.006661645602434874, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.0058466969057917595, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.0037345984019339085, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0040542748756706715, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.0.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.011429383419454098, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.0075537352822721004, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.005588356871157885, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.00519172940403223, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.00438977126032114, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.0028164733666926622, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.0069841137155890465, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.0062270089983940125, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.005267558619379997, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.0034512472338974476, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.003530367510393262, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.003545948304235935, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.0031718723475933075, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.002437365474179387, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.002232785103842616, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.001930012833327055, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.0017384790116921067, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.0016892468556761742, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.0016292271902784705, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.0015344234416261315, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.0012792731868103147, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.0016009478131309152, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.0010953868040814996, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0014540517004206777, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.0.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.05128643289208412, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.048581115901470184, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.04774592071771622, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.04385702311992645, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.022317523136734962, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.021579399704933167, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.024550963193178177, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.022774262353777885, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.02251606248319149, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.020706890150904655, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.020092297345399857, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.012380228377878666, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.010913525708019733, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.01072776224464178, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.010684091597795486, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.006229264196008444, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.005877051502466202, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.005864784121513367, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.005599945783615112, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.005594721995294094, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.0035112539771944284, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.004028323572129011, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.0034548912663012743, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.0032368111424148083, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.0.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.06075644493103027, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.05756758153438568, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.05662928521633148, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.052027378231287, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.02638067491352558, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.025508204475045204, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.028978509828448296, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.026905177161097527, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.026613760739564896, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.02445811778306961, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.023673778399825096, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.014495043084025383, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.012658540159463882, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.012442054226994514, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.012388799339532852, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.007224613800644875, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.006430565379559994, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.006415226496756077, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.00607236847281456, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.0060630543157458305, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.0038206824101507664, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.003929955419152975, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.003749430412426591, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.0027062478475272655, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.0.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.04208644479513168, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.036674607545137405, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.03409585356712341, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.03109377808868885, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.01803322322666645, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.015967652201652527, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.02276032045483589, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.020679010078310966, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.018807871267199516, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.01587408035993576, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.01550830714404583, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.01157107949256897, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.010097650811076164, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.008952864445745945, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.008671791292726994, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.0060324519872665405, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.005277224816381931, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.005161919165402651, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.004930675961077213, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.0047775739803910255, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.0036787940189242363, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.004109954461455345, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.00336170825175941, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.003482098923996091, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.1.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.012282980605959892, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.009164652787148952, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.0050956690683960915, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.0050143166445195675, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.004812548402696848, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.0021161416079849005, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.010087639093399048, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.008696330711245537, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.005712249781936407, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.0042411042377352715, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.004648913163691759, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.005116097163408995, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.004165153484791517, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.002471762243658304, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.0018706568516790867, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.002586265094578266, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.0015074610710144043, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.001280350610613823, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.0014203190803527832, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.0011145139578729868, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.0014238074654713273, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.0014095776714384556, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.0008574533858336508, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.001059690141119063, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.1.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.012700902298092842, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.009361302480101585, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.0052754939533770084, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.005154168698936701, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.004912692122161388, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.0021518010180443525, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.009833982214331627, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.008822842501103878, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.005794775206595659, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.004299592226743698, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.0046319239772856236, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.004964978434145451, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.00421343557536602, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.002485246630385518, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.0018661234062165022, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.0025008474476635456, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.0014619063585996628, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.0012130672112107277, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.0013632263289764524, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.0010261833667755127, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.0013364129699766636, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.001350013422779739, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.0007499310886487365, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0009591103880666196, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.1.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.08449047058820724, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.0645715743303299, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.05460280552506447, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.046335089951753616, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.03592061623930931, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.026595057919621468, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.0501960851252079, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.04526831582188606, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03931590914726257, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.02708759903907776, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.026514163240790367, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.025662148371338844, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.02177882194519043, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.0174512080848217, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.016293104737997055, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.012880455702543259, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.009300865232944489, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.008863930590450764, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.0076351952739059925, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006877892650663853, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.006760709919035435, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.006711497902870178, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.005170260090380907, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.004572182893753052, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.1.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.0833592414855957, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.06286449730396271, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.05462943762540817, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.042647771537303925, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03622272610664368, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.02801411971449852, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.04668210819363594, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.042208231985569, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03862762823700905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.024903062731027603, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.02355322800576687, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.02399100363254547, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.020830731838941574, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.018218517303466797, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.017564760521054268, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.012396031059324741, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.010897056199610233, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.010695823468267918, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.009177614003419876, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.008832301944494247, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.007421258836984634, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.008655917830765247, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.006671314127743244, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.007403053808957338, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.1.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.06741327792406082, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.06436379998922348, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.06349366158246994, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.058573901653289795, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.029279302805662155, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.02846153825521469, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.03201374411582947, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.029677418991923332, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.029482202604413033, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.027308516204357147, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.02657230757176876, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.016158059239387512, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.014170799404382706, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.014002731069922447, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.01396260131150484, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.008110497146844864, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.007589828222990036, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.007579632103443146, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.007247381843626499, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.007245340850204229, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.004536864813417196, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.005102121736854315, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.0044858818873763084, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.004032178781926632, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.1.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.08834812790155411, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.0844837874174118, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.08340636640787125, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.07715579867362976, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.03843995928764343, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.037405699491500854, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.04208194836974144, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.038950271904468536, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.03871024772524834, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.03590879589319229, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.03494977951049805, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.021072598174214363, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.018329009413719177, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.018112728372216225, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.018060393631458282, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.010501570999622345, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.009321369230747223, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.009309959597885609, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.008845067583024502, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.008844390511512756, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.005576325114816427, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.005624221637845039, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.005506786052137613, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.0038168986793607473, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.1.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.09295331686735153, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.08531191945075989, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.08143233507871628, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.0746840089559555, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.04148581624031067, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.03846373409032822, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.05009697005152702, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.04559525102376938, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.042375918477773666, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.03750928118824959, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.03647848963737488, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.025225380435585976, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.021936960518360138, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.02027313970029354, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.019820380955934525, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.013118986040353775, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.011283950880169868, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.011148998513817787, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.010589378885924816, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.010408375412225723, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.007833090610802174, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.008122753351926804, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.007428587879985571, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.006618574261665344, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.2.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.014397388324141502, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.012342123314738274, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.011057095602154732, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.009531069546937943, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.006395671982318163, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.005323838908225298, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.009197913110256195, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.007778882049024105, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.006662531290203333, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.005261114798486233, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.005056752823293209, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.004714189097285271, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.0037367180921137333, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.0031124174129217863, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.0029493416659533978, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.002377755008637905, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.0016970927827060223, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.001622107345610857, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.0014902701368555427, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.0013951176078990102, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.0012801086995750666, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.0012360586551949382, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.0009948621736839414, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0009054249967448413, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.2.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.01391732320189476, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.011790528893470764, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.01056318636983633, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.009056225419044495, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.006076115649193525, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.005026737228035927, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.007986844517290592, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.007395213004201651, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.006343797314912081, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.004959685727953911, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.004726929124444723, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.004033805336803198, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.003523522289469838, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.002932023722678423, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.0027748863212764263, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.0020342841744422913, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.00155764096416533, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.0014890608144924045, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.0013472698628902435, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.0012566184159368277, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.001090524485334754, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.0010958589846268296, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.0008998962584882975, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0007563851540908217, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.2.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.11156115680932999, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.10018257796764374, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.09638288617134094, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.08460400998592377, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.05054451897740364, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.04633267596364021, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.057905763387680054, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.05333058536052704, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.051754746586084366, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.043059587478637695, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.04027775675058365, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.02933833934366703, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.025386426597833633, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.02411784790456295, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.023807162418961525, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.014641232788562775, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.012256919406354427, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.012151927687227726, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.010729092173278332, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.010603721253573895, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.00750392209738493, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.007309847045689821, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.007020001299679279, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.004483103286474943, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.2.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.14682261645793915, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.1270771324634552, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.12050803005695343, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.1042342334985733, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.06656148284673691, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.05876719206571579, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.0773179680109024, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.07104466110467911, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.06849141418933868, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.05448263883590698, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.05043591558933258, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.03989829495549202, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.034739091992378235, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03274212405085564, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03226310759782791, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.020472057163715363, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.018397659063339233, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.018247058615088463, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.016287416219711304, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.01608894020318985, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.012083328329026699, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.013276188634335995, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.011517971754074097, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.010876836255192757, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.2.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.11418652534484863, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.10884952545166016, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.10733973979949951, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.0991419106721878, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.05073551833629608, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.04917586222290993, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.05559168756008148, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.0514826774597168, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.05114201083779335, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.04713032394647598, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.04584181681275368, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.027996934950351715, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.024379722774028778, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.024055950343608856, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.023981362581253052, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.01396593451499939, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.01250656507909298, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.012486902996897697, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.01182614080607891, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.011824592016637325, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.007469908334314823, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.007731601595878601, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.007368107791990042, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.005456050392240286, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.2.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.1357642114162445, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.12951871752738953, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1277553141117096, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1181090697646141, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.060405660420656204, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.05856557562947273, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.06622367352247238, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.06129943206906319, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.06088331714272499, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.05615013465285301, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.054593607783317566, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.03323392942547798, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.028909005224704742, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.02852696180343628, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.028439335525035858, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.016542809084057808, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.01451411098241806, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.01449010893702507, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.013677672483026981, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.013676952570676804, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.008623186498880386, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.008496103808283806, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.008498281240463257, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.005416026338934898, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.2.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.14161460101604462, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.13242341578006744, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.12893110513687134, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.1185993105173111, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.06375611573457718, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.06067071482539177, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.07293815910816193, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.06669485569000244, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.06476372480392456, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.05836380273103714, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.056549157947301865, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.03673894703388214, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.03174591064453125, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.030480332672595978, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.030178595334291458, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.018451634794473648, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.01599782519042492, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.015860505402088165, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.014971100725233555, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.014786769635975361, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.0101231774315238, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.010217149741947651, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.009729334153234959, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.007314620539546013, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.3.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.018774451687932014, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.016698377206921577, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.015495242550969124, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.013576192781329155, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.008531907573342323, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.0074891806580126286, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.010840616188943386, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.009837614372372627, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.008780368603765965, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.007241174578666687, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.006897824816405773, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.005525413900613785, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.004717376083135605, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.0041292388923466206, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.003980437759310007, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.002786535769701004, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.002205122960731387, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.002136267488822341, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.0019595911726355553, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.0018772452604025602, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.0015096638817340136, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.001519288052804768, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.0013043441576883197, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0010859743924811482, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.3.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.019435157999396324, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.01720636524260044, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.015750158578157425, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.013747618533670902, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.008702271617949009, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.007522504311054945, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.011130506172776222, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.01030314713716507, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.008967375382781029, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.007363586686551571, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.006997488439083099, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.005625796038657427, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.004906942136585712, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.004193470813333988, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.004010559991002083, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.0028254548087716103, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.002209380967542529, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.0021229840349406004, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.0019518984481692314, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.0018466663314029574, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.001503725303336978, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.0015122788026928902, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.0012796086957678199, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0010307437041774392, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.3.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.125090554356575, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.1138470321893692, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.11017438024282455, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.09771133214235306, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.0574030876159668, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.05321463197469711, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.06558291614055634, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.06011973321437836, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.058598075062036514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.049744486808776855, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.04673396423459053, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.033240169286727905, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.028643706813454628, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.027401374652981758, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.02709547057747841, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.01658327877521515, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.013881350867450237, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.013780458830296993, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.012298114597797394, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.012179356068372726, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.008484067395329475, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.008168462663888931, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.007962334901094437, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.004907145164906979, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.3.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.14322999119758606, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.12044529616832733, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.11245547235012054, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.09313582628965378, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.06485109031200409, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.055660489946603775, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.07724893093109131, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.07051245868206024, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.06726387143135071, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.05055789276957512, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.04581013321876526, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.03983352333307266, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.03472374379634857, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.032227788120508194, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03161201998591423, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.020445002242922783, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.018539544194936752, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.018352298066020012, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.01615121215581894, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.01589115709066391, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.01210141833871603, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.013874722644686699, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.011381071992218494, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.011644924059510231, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.3.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.1429670751094818, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.13591162860393524, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.13387763500213623, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.12326152622699738, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.06534037739038467, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.06308858096599579, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.07195503264665604, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.06645630300045013, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.0659051164984703, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.06025918945670128, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.058408115059137344, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.03677232563495636, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.032204706221818924, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03172089159488678, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.031601790338754654, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.018522128462791443, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.017457813024520874, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.01742069609463215, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.016558783128857613, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.016545193269848824, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.01053669024258852, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012045173905789852, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.010388548485934734, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009746570140123367, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.3.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.1679552048444748, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.15965820848941803, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1572502851486206, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.14481300115585327, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.07641549408435822, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07376393675804138, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.08419739454984665, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.07776578515768051, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.07710736989974976, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07042000442743301, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.06820861995220184, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.042613863945007324, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.03702450171113014, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03644245117902756, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03629970923066139, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.021291667595505714, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.018993277102708817, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.018952341750264168, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.01784064620733261, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.017828036099672318, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.011422639712691307, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.01180120836943388, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011229632422327995, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008365019224584103, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.3.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.028936566784977913, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.02649260312318802, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.019598428159952164, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.01780184730887413, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.01317665446549654, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.009745699353516102, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.02350390888750553, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.021376214921474457, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.013496498577296734, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.012038446962833405, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.011798367835581303, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.011188625358045101, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.010173491202294827, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.0069169653579592705, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.00635337783023715, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.006072165444493294, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.005011535715311766, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.004670385271310806, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.0048704869113862514, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.004508432000875473, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.004829712212085724, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.004642480053007603, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.004311723634600639, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.00413142004981637, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.4.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.02530379593372345, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.022834252566099167, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.02158098854124546, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.019142769277095795, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.011544503271579742, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.010406347922980785, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.01392066478729248, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.012834455817937851, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.011833678930997849, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.009959114715456963, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.009456300176680088, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.007068193983286619, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.006140179466456175, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.005566605366766453, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.0054251267574727535, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.003548592561855912, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.002938985824584961, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.0028764652088284492, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.002632282441481948, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.0025574378669261932, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.0018953799735754728, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.0019501978531479836, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.0017092424677684903, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0013737231492996216, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.4.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.026943448930978775, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.024155091494321823, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.022562067955732346, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.01992820017039776, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.012089598923921585, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.01073551271110773, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.01491641066968441, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.013832464814186096, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.012413262389600277, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.010391401126980782, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.009888408705592155, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.00752989761531353, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.006589164026081562, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.005816074088215828, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.005617524962872267, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.0037749777548015118, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.0030437111854553223, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.0029506392311304808, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.002711242763325572, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.002600887091830373, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.0020053740590810776, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.0020197827834635973, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.0017576627433300018, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0013624951243400574, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.4.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.13251318037509918, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.12201550602912903, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.11862294375896454, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.10620247572660446, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.06086008995771408, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.05699024349451065, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.06896425038576126, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.0632552057504654, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.06196189299225807, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.053444262593984604, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.05052616819739342, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.03489966690540314, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.0301203653216362, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.029031498357653618, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.028763670474290848, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.01741635426878929, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.014696924015879631, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.01461285725235939, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.013172956183552742, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.0130862295627594, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.008906335569918156, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.008598512969911098, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.0084454994648695, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.005192602053284645, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.4.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.14183753728866577, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.12473296374082565, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.11677718907594681, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.0982714295387268, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.06448110938072205, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.05703536048531532, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.07977524399757385, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.07248974591493607, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.06654836237430573, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.053049225360155106, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.049580760300159454, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.041031669825315475, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.035339996218681335, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03176579996943474, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.030884871259331703, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.020830625668168068, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.01790868304669857, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.01755244843661785, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.0159187950193882, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.015487195923924446, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.011976674199104309, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.013201888650655746, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.010887347161769867, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.010663852095603943, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.4.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.13360030949115753, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.12656298279762268, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.12447166442871094, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.11414221674203873, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.061081867665052414, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.058759380131959915, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.06731405854225159, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.06225794181227684, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.06168045848608017, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.055895060300827026, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.05394510179758072, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.034217577427625656, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.029772119596600533, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.029270363971590996, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.029147112742066383, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.017119718715548515, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.015451792627573013, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.015421670861542225, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.014464148320257664, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.014452429488301277, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.009310073219239712, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.009876835159957409, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.00914943777024746, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007291615940630436, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.4.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.16747865080833435, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1587245613336563, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.15609943866729736, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.14321982860565186, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.07663270831108093, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07370387762784958, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.08440813422203064, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.07808614522218704, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.07738333940505981, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07011988759040833, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.06765061616897583, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.042749762535095215, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.037188950926065445, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.036548249423503876, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.036399178206920624, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.021344922482967377, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.01899608224630356, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.018954727798700333, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.017726842314004898, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.017710909247398376, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.011360165663063526, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.011727951467037201, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011154304258525372, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008209608495235443, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.4.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.06051762402057648, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.05502040684223175, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.05173110216856003, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.04646595939993858, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.027496397495269775, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.02516625076532364, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.034572504460811615, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.030852336436510086, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.028118612244725227, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.024153081700205803, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.023084590211510658, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.016856715083122253, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.014578462578356266, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.013360904529690742, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.013068157248198986, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.008586317300796509, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.007264408282935619, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.007082364521920681, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.006648385897278786, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.006422208156436682, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.0049494048580527306, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.005033631809055805, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.0045721749775111675, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.003775108838453889, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.5.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.03714489936828613, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.03324234113097191, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.031567417085170746, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.027874242514371872, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.016845908015966415, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.01520972978323698, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.019919682294130325, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.0184260755777359, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.017312757670879364, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.014405610971152782, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.013600675389170647, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.01008265558630228, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.008789578452706337, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.00808475911617279, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.007913694716989994, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.005047954618930817, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.0042122225277125835, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.004142173565924168, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.0037313473876565695, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.003643045201897621, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.0026568241883069277, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.002704760292544961, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.0024254503659904003, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0018354253843426704, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.5.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.03924329951405525, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.03486032038927078, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.032903075218200684, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.028967633843421936, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.017530711367726326, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.015701791271567345, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.0210505910217762, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.019434744492173195, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.01805976964533329, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.014922358095645905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.014130166731774807, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.010622412897646427, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.009231960400938988, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.008398137986660004, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.008191410452127457, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.005307463929057121, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.0043368833139538765, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.00425071083009243, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.0038191713392734528, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.0037080058827996254, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.002771583618596196, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.002759021008387208, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.002491932362318039, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0017967532621696591, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.5.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.13775238394737244, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.12687523663043976, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.12330581992864609, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.11044161766767502, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.06350298970937729, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.05942567065358162, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.07157126069068909, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.0659903883934021, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.06466943770647049, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.055751822888851166, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.0527384914457798, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.03622987121343613, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.03145004063844681, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.03031907044351101, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.030042454600334167, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.01807938516139984, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.015378415584564209, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.015294804237782955, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.013786539435386658, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.013688674196600914, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.009269682690501213, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.00902937725186348, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.008850907906889915, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.005509234964847565, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.5.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.1378517895936966, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.11684518307447433, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.10905765742063522, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.08879255503416061, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.0632229894399643, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.05477239191532135, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.07524295151233673, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.06895960867404938, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.06547150015830994, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.04876885190606117, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.043685704469680786, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.038893673568964005, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.033481746912002563, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03087124601006508, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.030227188020944595, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.01960163563489914, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.01682814210653305, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.016611138358712196, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.014186675660312176, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.013881921768188477, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.010962847620248795, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.011729271151125431, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.010146567597985268, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.009057479910552502, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.5.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.15002337098121643, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.14175133407115936, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.13936850428581238, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1273939162492752, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.06896448880434036, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.0661499947309494, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.07606584578752518, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.07034885138273239, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.06970593333244324, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.06273875385522842, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.06032738462090492, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.03857040777802467, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.03352034464478493, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03290432319045067, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.032756224274635315, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.019272668287158012, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.017021989449858665, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.016980847343802452, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.015788493677973747, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.01577531173825264, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.010244389064610004, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.010389658622443676, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.010045220144093037, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007117740344256163, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.5.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.18607628345489502, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1759124994277954, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1729234755039215, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1581527292728424, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08554969727993011, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.08208213746547699, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.0942745953798294, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.0872412845492363, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08645482361316681, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07784243673086166, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07476552575826645, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.0476788729429245, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.0414627343416214, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04070410504937172, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.040525857359170914, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02376575395464897, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.020734623074531555, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.020685123279690742, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.01916719228029251, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.01915348693728447, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012355124577879906, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012173119001090527, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.012104198336601257, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007731206249445677, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.5.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.18476006388664246, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.1681203544139862, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.1619369238615036, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.14514459669589996, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.08388908207416534, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.07806806266307831, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.09668295085430145, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.08881857991218567, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.08571713417768478, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.07370071113109589, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.06990733742713928, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.048993244767189026, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04238998517394066, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.040228985249996185, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.03969910368323326, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.02456725761294365, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.02112552709877491, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.020897263661026955, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.01917351223528385, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.01884814165532589, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.013341689482331276, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.013568440452218056, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.012660794891417027, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.009660833515226841, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.6.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.04198164865374565, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.038014840334653854, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.036232564598321915, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.03223995119333267, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.019175544381141663, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.017459336668252945, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.0225992389023304, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.020899975672364235, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.019639762118458748, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.016590848565101624, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.015750056132674217, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.011448346078395844, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.00997922196984291, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.009213333018124104, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.009025730192661285, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.005729991011321545, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.004812080413103104, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.004732950124889612, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.004303235560655594, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.004208039958029985, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.00302108027972281, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.0030994026456028223, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.0027678608894348145, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0021268760319799185, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.6.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.04090297222137451, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.03673969581723213, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.03465240076184273, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.030719568952918053, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.01841912791132927, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.01654379814863205, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.022338705137372017, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.020585648715496063, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.018893705680966377, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.015886543318629265, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.015158873982727528, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.01127338595688343, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.009800242260098457, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.008833560161292553, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.008590137585997581, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.005647732876241207, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.004573093727231026, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.004464785568416119, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.004068467300385237, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.003938133828341961, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.002959473757073283, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.0029409376438707113, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.0026435370091348886, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.001929144375026226, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.6.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.14950570464134216, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.13823172450065613, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.13455544412136078, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.12083454430103302, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.06896454840898514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.0647517666220665, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.07763087749481201, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.07165764272212982, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.07014334201812744, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.060843612998723984, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.057709865272045135, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.03928862139582634, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.03410429507493973, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.032906338572502136, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.03261227160692215, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.019599031656980515, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.016645871102809906, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.016555827111005783, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.01495778001844883, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.014873017556965351, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.01002119667828083, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.009702359326183796, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.009579170495271683, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.005823094397783279, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.6.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.16375285387039185, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.1379496455192566, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.1286417841911316, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.10956845432519913, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.07398118078708649, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.06356734782457352, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.08843520283699036, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.08119815587997437, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.07723114639520645, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.05874016135931015, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.05404217168688774, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.04563438147306442, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.03951258212327957, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.036283642053604126, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03549107164144516, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.023173250257968903, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.02007909305393696, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.019836459308862686, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.01735224761068821, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.016951845958828926, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.013243813998997211, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.014325130730867386, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.012295250780880451, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.011349411681294441, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.6.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.15503017604351044, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1465015709400177, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.14396923780441284, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.13163436949253082, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.07157379388809204, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.06861862540245056, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.07888798415660858, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.0730317160487175, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.07233233004808426, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.06506503373384476, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.06253743171691895, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04002173990011215, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.03480474650859833, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03415336459875107, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03399648517370224, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.01997428759932518, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.017624152824282646, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.01758422888815403, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.016329782083630562, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.016312217339873314, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.01053555216640234, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.010695507749915123, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.010323390364646912, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007249332498759031, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.6.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.19228427112102509, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1817205250263214, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1786230057477951, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.16332915425300598, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08875131607055664, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.085092693567276, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09771460294723511, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.09055618196725845, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08971673995256424, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.08069301396608353, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07748429477214813, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04942460358142853, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04304123669862747, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.042237669229507446, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.042042966932058334, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.024630382657051086, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.021462025120854378, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.021410468965768814, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.019815562292933464, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.019798042252659798, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012715755961835384, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012521988712251186, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.012448016554117203, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007831629365682602, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.6.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.19206863641738892, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.17468957602977753, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.1681285947561264, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.15037837624549866, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.0873931348323822, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08119366317987442, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.10072334110736847, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.09265081584453583, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.08930012583732605, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.0766233429312706, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07247544825077057, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.051092300564050674, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04422138258814812, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04190121218562126, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.041347436606884, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.025596115738153458, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.021971965208649635, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.021722713485360146, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.019901324063539505, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.019548138603568077, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.01383089181035757, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.014083635993301868, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.013090668246150017, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.009968887083232403, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.7.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.04101298749446869, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.0377851240336895, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.03631281107664108, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.032571375370025635, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.018945662304759026, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.017527421936392784, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.022054271772503853, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.0203771460801363, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.019291354343295097, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.016669398173689842, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.015868432819843292, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.011185212060809135, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.009749024175107479, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.009107734076678753, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.008951259776949883, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.005599943920969963, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.00476209819316864, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.004695428069680929, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.004312369506806135, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.004239662550389767, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.0029624050948768854, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.0030537794809788465, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.0027536526322364807, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.00211968831717968, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.7.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.036908190697431564, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.03393850848078728, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.03219417855143547, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.028833895921707153, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.016874993219971657, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.015399460680782795, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.02033993974328041, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.018781699240207672, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.01718832552433014, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.014848442748188972, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.014231130480766296, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.010263198986649513, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.00894121639430523, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.008092586882412434, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.007886632345616817, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.005140659399330616, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.004191307816654444, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.0040916744619607925, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.0037844309117645025, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.0036748501006513834, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.0027030478231608868, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.0026804429944604635, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.002437631832435727, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0017614207463338971, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.7.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.1623753011226654, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.15112437307834625, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.14746515452861786, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.13298577070236206, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.07514411211013794, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.07095217704772949, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.08456936478614807, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.07758443057537079, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.07629416882991791, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.06674499064683914, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.06355947256088257, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.04284294694662094, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.03696107864379883, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.035833362489938736, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.03557151183485985, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02136668935418129, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.018119318410754204, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.018031487241387367, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.016376610845327377, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.016302719712257385, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.010925471782684326, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.010501747950911522, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.010441150516271591, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0062757316045463085, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.7.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.14804460108280182, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.1250324845314026, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.11428499966859818, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.09440036863088608, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.06701850891113281, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.05665186047554016, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.08372001349925995, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.07699531316757202, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.06966163218021393, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.05251706391572952, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.04823033884167671, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.04306924715638161, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.03726116195321083, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03275763615965843, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.031617049127817154, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.02168605849146843, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.017946092411875725, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.017484677955508232, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.015328829176723957, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.0147189786657691, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.012079090811312199, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.012868589721620083, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.010715562850236893, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.009830646216869354, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.7.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.15451836585998535, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.14606338739395142, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1435372531414032, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.13144038617610931, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.07154998183250427, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.0686190128326416, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.07897163182497025, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.07303359359502792, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.07233741879463196, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.06510939449071884, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.0627332404255867, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04015817120671272, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.034883417189121246, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.034227967262268066, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03407447040081024, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.020066604018211365, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.017763569951057434, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.01772535778582096, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.016490062698721886, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.016471508890390396, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.010701839812099934, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.010936494916677475, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.01049091387540102, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007594875525683165, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.7.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.19676096737384796, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.18604198098182678, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.18288731575012207, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.167497456073761, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.0910966694355011, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.08733806014060974, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.10037872940301895, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.09295544773340225, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.09207972884178162, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.08287408202886581, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.0796721875667572, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05085937678813934, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04422011598944664, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04338531941175461, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.0431906096637249, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.025356529280543327, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.022073466330766678, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02201925776898861, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.020393533632159233, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.02037252113223076, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.0131497448310256, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012913407757878304, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.012874388135969639, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008120378479361534, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.7.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.19394993782043457, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.176006481051445, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.16928593814373016, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.1513151377439499, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.08825576305389404, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08181759715080261, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.10194899141788483, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.09362255781888962, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09025023132562637, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.07715174555778503, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.0729939267039299, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.051746197044849396, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.044731345027685165, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04236248508095741, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04179444536566734, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.025988852605223656, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.022289037704467773, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.022033989429473877, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.020157955586910248, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.019800638779997826, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.014198845252394676, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.014378026127815247, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.013460787013173103, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010275260545313358, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.8.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.053939249366521835, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.04990627244114876, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.04827440530061722, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.043488409370183945, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.024922769516706467, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.02325923927128315, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.02847515605390072, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.026349565014243126, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.025339575484395027, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.02203885093331337, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.020988931879401207, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.014450312592089176, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.01260439958423376, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.011963672004640102, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.011813545599579811, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.007231134921312332, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.006240687798708677, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.006177322473376989, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.005672097206115723, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.005605967715382576, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.003814451862126589, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.003946448210626841, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.003602205775678158, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0027357880026102066, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.8.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.04845714196562767, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.044708576053380966, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.0428968146443367, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.038502126932144165, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.022121688351035118, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.020458223298192024, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.025857044383883476, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.0239093154668808, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.022515447810292244, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.019539114087820053, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.018687238916754723, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.013062911108136177, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.011400632560253143, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.010594218038022518, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.010398933663964272, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.006537733133882284, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.005465512629598379, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.005376656074076891, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.004949002061039209, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.004856181796640158, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.0034213573671877384, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.0034241918474435806, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.003168297465890646, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.002259207656607032, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.8.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.172231525182724, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.16082854568958282, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.15722087025642395, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.1423153579235077, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.07976232469081879, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.07554925978183746, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.0892660841345787, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.08221463114023209, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.08089848607778549, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.07122593373060226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.0679192766547203, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.04521665349602699, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.039150066673755646, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.03803519532084465, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.037765663117170334, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02253442257642746, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.019217297434806824, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.019128311425447464, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.01744885928928852, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.017382891848683357, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.011528116650879383, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.011120853945612907, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.011080566793680191, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.006647520232945681, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.8.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.18153484165668488, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.15859895944595337, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.1508162021636963, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.12739580869674683, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.08221026510000229, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.07344576716423035, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.09610968828201294, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.08827273547649384, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.08516313135623932, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.06723850220441818, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.061358753591775894, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.04932815581560135, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.04258042946457863, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03988007828593254, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.039228588342666626, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.02491185814142227, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.021440263837575912, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.021252557635307312, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.018632492050528526, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.01836668699979782, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.013950920663774014, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.014485590159893036, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.013134012930095196, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.010930871590971947, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.8.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.16062003374099731, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1517033874988556, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.14908340573310852, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1363711804151535, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.07443127781152725, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07130154222249985, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.08212561905384064, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.07599349319934845, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.07525664567947388, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.06760227680206299, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.06499087065458298, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04165663197636604, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.0362032987177372, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.0355047844350338, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03533603623509407, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.020795457065105438, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.0182016734033823, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.01815543696284294, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.016819573938846588, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.016797557473182678, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.010914620943367481, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.010862955823540688, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.010683898814022541, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007124627940356731, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.8.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.20486190915107727, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.19356881082057953, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.19026018679141998, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.17411170899868011, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.09503743052482605, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.0910506397485733, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.10471820831298828, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.09699873626232147, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.09609915316104889, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.0863143652677536, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08288033306598663, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05304862931370735, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04615936055779457, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.0452786423265934, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04506700113415718, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.026448145508766174, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.023010525852441788, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.022956518456339836, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.021226003766059875, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.021200260147452354, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.013682752847671509, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.013432524167001247, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.013387174345552921, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.00839343760162592, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.8.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.2008305788040161, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.18185696005821228, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.17481876909732819, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.15604440867900848, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09131879359483719, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08456666767597198, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.1052035242319107, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.0968819111585617, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09343138337135315, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.0796038955450058, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07517042011022568, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05338117107748985, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04623688384890556, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04378263279795647, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.043195754289627075, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.026750806719064713, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.022939622402191162, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.022681409493088722, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.020684590563178062, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.02031523920595646, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.014455384574830532, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.014682527631521225, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.013685734011232853, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010369377210736275, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.9.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.05020010471343994, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.04670369252562523, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.0452050045132637, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.040854863822460175, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.023270215839147568, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.02180299162864685, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.026623696088790894, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.024645525962114334, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.02362753637135029, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.020716741681098938, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.019770648330450058, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.013512755744159222, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.011776896193623543, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.01116630807518959, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.011017615906894207, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.0067580146715044975, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.005799374543130398, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.0057363654486835, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.0052904109470546246, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.005225461442023516, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.003553129732608795, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.003638859372586012, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.003349245758727193, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0024795481003820896, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.9.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.04580118507146835, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.04250751808285713, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.04082762822508812, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.03678496181964874, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.021015174686908722, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.019502196460962296, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.024489039555191994, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.02269689552485943, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.02134552411735058, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.018668491393327713, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.017865225672721863, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.012382431887090206, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.010838302783668041, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.010060724802315235, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.009877797216176987, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.006199446506798267, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.005204086657613516, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.0051170228980481625, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.004736929200589657, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.004647545982152224, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.003257699543610215, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.0032700705341994762, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.0030197976157069206, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0021665089298039675, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.9.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.18060295283794403, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.16888132691383362, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.16533268988132477, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.1497349739074707, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.08369280397891998, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.07947102189064026, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.09366044402122498, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.08597490191459656, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.08481575548648834, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.0748978853225708, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.07142306864261627, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.04743444547057152, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04095569625496864, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.03992360830307007, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.03968057036399841, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02367478609085083, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.020200839266180992, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.02012566849589348, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.018373258411884308, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.018313700333237648, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.012112396769225597, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.011690753512084484, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.011655347421765327, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0070548346266150475, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.9.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.1720973402261734, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.1472283899784088, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.13865970075130463, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.11436057090759277, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.07778476923704147, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.06802456825971603, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.09167515486478806, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.08436097949743271, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.08092445135116577, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.061639346182346344, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.055335063487291336, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.04704461991786957, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.040697429329156876, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03771703317761421, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.036992888897657394, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.023628931492567062, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.020243963226675987, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.020033497363328934, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.017215590924024582, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.016883626580238342, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.013006387278437614, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.01370210014283657, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.012099934741854668, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.010244875214993954, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.9.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.1582939773797989, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.14950507879257202, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.14691314101219177, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1345045417547226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.07358773052692413, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.0704760029911995, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.08129271864891052, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.07513493299484253, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.0743955597281456, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.06682369112968445, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.0643056109547615, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04125899821519852, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.0358559787273407, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03516100347042084, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.034992486238479614, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.020636441186070442, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.01814141497015953, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.018092859536409378, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.016788972541689873, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.01676809787750244, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.01093043852597475, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.011003667488694191, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.010699608363211155, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007438418455421925, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.9.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.2035594880580902, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.19232363998889923, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.188989520072937, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.17303252220153809, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.09465833008289337, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09067884087562561, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.10437683761119843, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.09663915634155273, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.09570381045341492, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.08597110956907272, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08263477683067322, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05292992666363716, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04603392258286476, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04514385759830475, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.044935669749975204, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.026399217545986176, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.02300216257572174, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.022942807525396347, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.021226897835731506, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.02119777351617813, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.01370936818420887, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.013504686765372753, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.013415032997727394, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008553827181458473, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.9.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.1997700184583664, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.1811758577823639, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.17424830794334412, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.15532195568084717, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.0909508690237999, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08431386202573776, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.10488510131835938, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.096529021859169, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09303461760282516, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.07938168942928314, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.0748893991112709, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.053262241184711456, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04622379690408707, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04374800622463226, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04315849393606186, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.02674761414527893, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.023161860182881355, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.0229010209441185, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.02096748724579811, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.02059568651020527, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.014615328051149845, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015139801427721977, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.013863532803952694, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.011036480776965618, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.10.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.06237264722585678, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.0580044686794281, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.05632517859339714, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.05087105184793472, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.02886323630809784, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.0271103847771883, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.032738715410232544, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.030291074886918068, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.02930840477347374, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.02567099779844284, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.024486763402819633, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.016616156324744225, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.014479070901870728, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.013841642066836357, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.01369033008813858, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.008310073055326939, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.007192314602434635, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.007131633814424276, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.006558314431458712, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.006498238537460566, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.0043727802112698555, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.004498473834246397, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.00415824493393302, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.003086886601522565, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.10.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.05361756682395935, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.04974851384758949, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.04787031188607216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.043080393224954605, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.02452857978641987, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.022839777171611786, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.028479233384132385, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.02635148912668228, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.02491431124508381, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.021787036210298538, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.020821895450353622, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.014371571131050587, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.012552361935377121, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.011737782508134842, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.01154258195310831, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.007184620015323162, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.006045988295227289, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.005958580877631903, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.005496955011039972, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.00540125509724021, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.0037577669136226177, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.003759814193472266, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.003503922838717699, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0024618864990770817, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.10.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.17722095549106598, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.16589803993701935, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.1622830480337143, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.14708144962787628, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.08221990615129471, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.0779740959405899, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.09199652820825577, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.08467359095811844, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.08332568407058716, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.07359549403190613, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.07026464492082596, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.046576257795095444, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04027728736400604, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.03921012207865715, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.03893674165010452, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.023237330839037895, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.01982194371521473, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.019742896780371666, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.018028154969215393, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.017963973805308342, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.011885871179401875, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.011485271155834198, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.01143426913768053, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.00691748782992363, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.10.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.16643692553043365, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.145542711019516, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.13781821727752686, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.11829058825969696, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.07595822215080261, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.06755178421735764, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.08963029086589813, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.08218684047460556, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.07830452919006348, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.06198008358478546, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.05814780294895172, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.04622012749314308, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.03985656052827835, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03704576939344406, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03636927530169487, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.023424485698342323, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.020199991762638092, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.01996191404759884, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.017626408487558365, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.017301881685853004, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.013282532803714275, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.014017215929925442, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.012448750436306, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.010862201452255249, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.10.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.1586354672908783, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1498234122991562, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1471605896949768, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.13467204570770264, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.07382962107658386, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07068777084350586, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.08152572065591812, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.07540977001190186, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.07465623319149017, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.06701932102441788, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.0645030066370964, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04144715517759323, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.036005932837724686, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.035293322056531906, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03513047844171524, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02070617489516735, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.018232259899377823, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.01818442903459072, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.016870304942131042, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.01685125194489956, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.010966084897518158, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.01108990702778101, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.010738980956375599, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007537263445556164, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.10.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.20713165402412415, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1956600695848465, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.19225884974002838, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.17596498131752014, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.09639260917901993, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09232395887374878, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.10634029656648636, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.09843230247497559, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.09744996577501297, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.08751533180475235, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08411134779453278, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05396271124482155, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.046892061829566956, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04598250985145569, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04576921835541725, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.026922350749373436, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.023430176079273224, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.023369736969470978, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.02161482349038124, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.021587537601590157, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.014010509476065636, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.013764675706624985, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.013709858059883118, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008720343001186848, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.10.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.19768474996089935, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.17867977917194366, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.1715143322944641, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.15304844081401825, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.0899132564663887, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08310120552778244, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.10412123054265976, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.09573163837194443, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09207119792699814, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.07831060141324997, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07395469397306442, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.052839234471321106, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.045839495956897736, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04326760768890381, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.042646441608667374, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.026583736762404442, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.022922154515981674, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.02265079692006111, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.020719075575470924, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.020328441634774208, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.014598807319998741, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015017068013548851, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.013812871649861336, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010946172289550304, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.11.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.05880766361951828, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.05472390353679657, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.05305233597755432, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.04795225337147713, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.027224384248256683, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.025539109483361244, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.031027430668473244, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.028703881427645683, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.027637816965579987, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.024243008345365524, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.023145616054534912, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.01575472205877304, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.013723799958825111, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.013061461970210075, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.012903210707008839, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.00788026861846447, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.006797185633331537, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.006731607019901276, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.006206417456269264, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.006141113582998514, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.0041638705879449844, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.004275530111044645, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.003944915719330311, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0029470503795892, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.11.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.05157389119267464, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.047868985682725906, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.046016838401556015, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.041432712227106094, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.023582715541124344, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.02190401591360569, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.02759643644094467, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.02551330253481865, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.023940997198224068, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.020968373864889145, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.0200921930372715, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.013929290696978569, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.012147082947194576, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.01128201000392437, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.011074719950556755, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.006967029068619013, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.005817029625177383, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.0057184575125575066, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.005291284993290901, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.005189760122448206, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.0036519605200737715, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.003633776679635048, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.003378009656444192, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0023756709415465593, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.11.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.18273979425430298, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.17094996571540833, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.16726501286029816, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.1514120250940323, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.08462531864643097, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08031058311462402, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.09469197690486908, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.08702408522367477, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.0857314020395279, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.07567355036735535, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.07223326712846756, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.04795493930578232, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04141842573881149, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04032418504357338, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04007091745734215, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.023921800777316093, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.02041415497660637, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.020328842103481293, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.018574262037873268, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.01850188337266445, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.012248736806213856, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.011841682717204094, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.01178868766874075, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007165297865867615, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.11.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.1771276593208313, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.15368090569972992, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.1450388878583908, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.12539882957935333, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.08089610934257507, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.07094897329807281, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.09498073160648346, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.08748815953731537, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.08321615308523178, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.06537728756666183, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.06124218553304672, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.04874778538942337, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.042088832706213, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03910539299249649, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03837420418858528, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.02441953867673874, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.020769786089658737, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.0205003023147583, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.017806226387619972, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.017450079321861267, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.013297618366777897, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.013792602345347404, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.012364665046334267, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.010050470940768719, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.11.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.1440834403038025, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.13606740534305573, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.13364392518997192, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.12231983244419098, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.067061647772789, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.06420587748289108, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.07401686161756516, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.06850314885377884, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.06780870258808136, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.06086916849017143, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.058572035282850266, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.03766544535756111, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.032729726284742355, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03208670765161514, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.031933341175317764, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.018808513879776, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.016627443954348564, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.016583560034632683, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.015395520254969597, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.015377840958535671, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.009982679039239883, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.010193156078457832, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.009771805256605148, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007020263001322746, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.11.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.1995900422334671, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1885368973016739, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.18525105714797974, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.16953590512275696, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.09284887462854385, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.08888966590166092, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.10242421925067902, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.09481967240571976, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.09386517852544785, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.08425389230251312, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.081000916659832, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05201583355665207, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04516712948679924, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.044284455478191376, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04407735541462898, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.0259249284863472, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.02257387526333332, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.022516421973705292, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.020822227001190186, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.02079627849161625, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.013509765267372131, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.013278781436383724, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.013219005428254604, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008434385061264038, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.11.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.1966218650341034, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.17774498462677002, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.17067353427410126, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.15212400257587433, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.08935004472732544, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08261579275131226, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.10345903038978577, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.0950920432806015, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09146276861429214, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.07775735855102539, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07342653721570969, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05242520570755005, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04550357162952423, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04296710714697838, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.042357273399829865, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.02639053575694561, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.022744711488485336, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.02247565984725952, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.020543428137898445, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.020157070830464363, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.014491827227175236, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.01487478706985712, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.013710054568946362, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010817673057317734, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.12.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.06189017370343208, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.057811275124549866, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.0562230683863163, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.050952643156051636, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.02872452139854431, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.02708934061229229, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.0324755497276783, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.030066952109336853, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.029128435999155045, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.02568921074271202, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.024565506726503372, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.016479093581438065, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.014360593631863594, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.01376462820917368, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.01362056564539671, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.008241072297096252, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.007124988827854395, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.007069263141602278, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.006519187707453966, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.006464774254709482, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.004331665113568306, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.0044139414094388485, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004136198200285435, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0029933080077171326, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.12.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.05487309768795967, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.051152586936950684, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.049459319561719894, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.0447087287902832, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.02521054446697235, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.02358745038509369, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.029081210494041443, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.026881523430347443, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.025563184171915054, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.022528816014528275, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.021568994969129562, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.014656683430075645, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.012805450707674026, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.012045187875628471, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.011867403984069824, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.007329014595597982, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.006176584865897894, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.006096554920077324, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.005633409135043621, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.00555145600810647, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.0038243269082158804, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.0037807412445545197, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.003595401532948017, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0024315330665558577, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.12.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.18621760606765747, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.17449791729450226, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.17097240686416626, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.15507568418979645, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.08631375432014465, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08202561736106873, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.09633079171180725, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.0885668471455574, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.08744306117296219, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.07740966975688934, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.07398835569620132, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.048732686787843704, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.042165882885456085, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.041133150458335876, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.040864888578653336, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02429807372391224, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.020769082009792328, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.02069874294102192, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.018926197662949562, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.01887921430170536, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.01241401955485344, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.011964063160121441, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.011977721937000751, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007142769638448954, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.12.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.18595153093338013, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.15708620846271515, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.14737506210803986, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.11923424899578094, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.0839645117521286, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.07272520661354065, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.09863763302564621, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.09090715646743774, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.08730628341436386, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.06446673721075058, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.05715794116258621, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.0505693219602108, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.04376628249883652, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.040637075901031494, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.0398782379925251, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.025396252050995827, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.021660668775439262, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.02143322303891182, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.01797438971698284, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.017645658925175667, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.013887008652091026, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.014481795951724052, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.012919371947646141, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.01065460778772831, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.12.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.153945654630661, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.14528602361679077, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.14269933104515076, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1306006759405136, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.07179392874240875, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.06870996206998825, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.07927580177783966, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.07333667576313019, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.07259466499090195, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.06513256579637527, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.06269802153110504, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04040572792291641, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.03512474149465561, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03443312644958496, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03427291661500931, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.0202021524310112, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.01798931509256363, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.017942683771252632, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.01667933166027069, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.016657602041959763, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.010824058204889297, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.011235899291932583, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.010604418814182281, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007986066862940788, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.12.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.20766036212444305, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.196054145693779, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.19261232018470764, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1761924773454666, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.0967029258608818, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09256114810705185, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.10659021139144897, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.09876775741577148, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.09778720140457153, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.08771033585071564, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.0842362716794014, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.054082680493593216, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.0470476895570755, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04612032696604729, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04590005427598953, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02696065790951252, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.023492304608225822, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.023433154448866844, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.02164822444319725, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.021621564403176308, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.013973296619951725, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.01378455851227045, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.013663225807249546, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.00871574692428112, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.12.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.20278498530387878, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.18359418213367462, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.17637071013450623, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.15728457272052765, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09230080246925354, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08534519374370575, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.10665919631719589, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.09809260070323944, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09441061317920685, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08028499782085419, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.0759144201874733, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05416347458958626, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.046889252960681915, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.044320520013570786, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04370897263288498, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.027134865522384644, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.023340076208114624, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.023066446185112, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.02103501372039318, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.020644115284085274, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.014692917466163635, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015097771771252155, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.013889689929783344, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010813025757670403, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.13.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.06691201776266098, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.06239299103617668, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.060630835592746735, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.05491800978779793, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.031040390953421593, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.029202822595834732, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.035181280225515366, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.032563019543886185, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03149363398551941, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.02772047556936741, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.026477258652448654, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.017846107482910156, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.015554447658360004, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.014869023114442825, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.014707292430102825, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.008918466046452522, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.007688641082495451, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.007623187266290188, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.007021473720669746, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.006957868114113808, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.004677930846810341, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.004751897417008877, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004455667920410633, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0031935006845742464, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.13.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.05941333249211311, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.05525900050997734, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.05323079973459244, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.04812648147344589, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.027253778651356697, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.025417422875761986, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03158342465758324, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.029285956174135208, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.027656031772494316, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.024314407259225845, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.023291299119591713, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.015919851139187813, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.01392286829650402, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.013036889024078846, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.012820604257285595, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.007961851544678211, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.0067001101560890675, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.006595665588974953, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006108324974775314, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006004081107676029, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004161893855780363, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004138248041272163, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.003885742509737611, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0026748920790851116, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.13.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.19485965371131897, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.18262843787670135, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.1787867546081543, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.16238580644130707, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.0904998779296875, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08607979118824005, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10110291093587875, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09299013018608093, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09169113636016846, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08124588429927826, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.07764195650815964, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.051238756626844406, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.044262245297431946, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.043172236531972885, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04290815815329552, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02554897964000702, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.02181306853890419, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.02172962948679924, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.019883230328559875, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.019833499565720558, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013070118613541126, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.012581619434058666, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.012607856653630733, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007533928379416466, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.13.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.18154709041118622, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.15805423259735107, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.1501697301864624, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.1252017766237259, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.08238862454891205, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.07309985160827637, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.09563235938549042, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.0882178321480751, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.08503018319606781, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.06654549390077591, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.05908112972974777, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.04890380799770355, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.04239076003432274, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03975359722971916, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.039124831557273865, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.024584881961345673, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.02106626145541668, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.020867129787802696, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.018028823658823967, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.017772361636161804, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.013416982255876064, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.013845723122358322, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.012609930709004402, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.010050054639577866, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.13.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.15813763439655304, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.14927606284618378, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.14658787846565247, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1341564655303955, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.07379864156246185, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07061898708343506, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.08155643939971924, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.07539359480142593, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.07462378591299057, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.06692832708358765, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.06439026445150375, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.041538022458553314, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.0360528901219368, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.0353398434817791, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03516995906829834, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.0207577683031559, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.018330775201320648, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.018283482640981674, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.01696743816137314, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.016943546012043953, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.011043500155210495, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.011264201253652573, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.010809015482664108, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.0077856783755123615, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.13.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.21640069782733917, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.2043348252773285, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.2007526159286499, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.18366803228855133, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10092146694660187, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09660089015960693, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11129295825958252, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10305577516555786, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.1020527109503746, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09153230488300323, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08786831051111221, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05644840747117996, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.049104075878858566, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04813675582408905, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04790840670466423, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.028149142861366272, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.024509327486157417, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.024445585906505585, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.022583015263080597, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.02255057729780674, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.014586499892175198, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.01436650287359953, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.01426580362021923, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.0090534882619977, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.13.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.21189278364181519, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.19119805097579956, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.1834499090909958, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.1632571667432785, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09622499346733093, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08877983689308167, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11127734929323196, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.1023779958486557, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09857238829135895, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08346275240182877, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07875868678092957, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.056397952139377594, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.048878263682127, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04615796357393265, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04550457373261452, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.028288625180721283, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.02420148253440857, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.023922497406601906, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.02173951268196106, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.02132861688733101, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015306842513382435, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015535992570221424, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014458626508712769, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010984066873788834, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.14.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.06919442117214203, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.06459344923496246, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06279599666595459, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.05686492100358009, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03208433836698532, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.030238939449191093, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.036286789923906326, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.03360004723072052, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03254159912467003, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.02867063879966736, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.027398141101002693, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.018386071547865868, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.016030391678214073, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.01535656675696373, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.01519690826535225, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.009186443872749805, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.007912696339190006, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.00784633494913578, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.007223630324006081, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.0071621909737586975, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.004794783890247345, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.004842176102101803, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.0045706103555858135, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0032039156649261713, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.14.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.061806779354810715, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.0575612410902977, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.05561034008860588, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05026157945394516, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.028369303792715073, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.026549722999334335, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03267388045787811, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.030289582908153534, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.02879464253783226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.02533860318362713, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.024247922003269196, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.016496283933520317, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.01443145889788866, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.013573835603892803, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.01336509920656681, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.008243517950177193, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.006966087967157364, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.006870361976325512, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006348654627799988, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006252260413020849, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004304929636418819, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004275314509868622, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004039891064167023, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.002751382766291499, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.14.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.1992369294166565, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.1864997148513794, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.18263676762580872, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.16569288074970245, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09237837046384811, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08783043920993805, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10317404568195343, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09480385482311249, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09357841312885284, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.0828147754073143, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.07908854633569717, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.052263207733631134, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04511693865060806, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.044037558138370514, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.043757759034633636, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02604379691183567, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.0222516767680645, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.022179897874593735, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.020263193175196648, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.020208396017551422, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013323339633643627, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.012832646258175373, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.012853539548814297, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007690847385674715, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.14.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.19009357690811157, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.16964305937290192, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.1625254899263382, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.13592319190502167, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.08734709024429321, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.07928818464279175, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.10019844770431519, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.09252744913101196, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.0892103761434555, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.07140319049358368, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.0643390417098999, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.05122340843081474, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.04442872107028961, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.04207970201969147, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.0415058396756649, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.025649812072515488, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.02217756398022175, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.02197159454226494, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.019084034487605095, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.01888202503323555, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.013858330436050892, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.014354105107486248, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.013122362084686756, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.010312030091881752, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.14.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.16292481124401093, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1538202464580536, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1510823667049408, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.13809463381767273, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.07612982392311096, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07285040616989136, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.08409778773784637, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.07780837267637253, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.07700980454683304, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.06903533637523651, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.06632398068904877, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04281516373157501, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.03722246736288071, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.036481134593486786, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.0363050177693367, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02140486240386963, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.018929287791252136, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.018879732117056847, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.017517894506454468, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.017491687089204788, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.011383051052689552, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.011641702614724636, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011139951646327972, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008063389919698238, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.14.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.2177683711051941, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.20557935535907745, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20198450982570648, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.18474377691745758, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10164213180541992, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09724615514278412, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11204130947589874, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10378693044185638, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10277186334133148, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09211011230945587, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08839929848909378, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05684259533882141, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04945503547787666, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04847635328769684, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04824351519346237, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02834765426814556, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.024652021005749702, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.024588514119386673, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.0226982943713665, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.022669151425361633, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.014676588587462902, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.014407468028366566, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.014350335113704205, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009018204174935818, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.14.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.21286840736865997, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.1921847015619278, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.18440526723861694, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.16416552662849426, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09685231000185013, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08938127011060715, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11200336366891861, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10310720652341843, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09920746833086014, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08410285413265228, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07934758067131042, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.056896552443504333, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04939216747879982, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04660485312342644, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.0459291972219944, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.028621558099985123, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.02468813583254814, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.02439248189330101, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.022259708493947983, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.021840814501047134, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015592913143336773, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.01616906002163887, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014715952798724174, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.011782118119299412, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.15.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.06952497363090515, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.0648745521903038, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06308366358280182, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.05712999403476715, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03226831927895546, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03040015697479248, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.03648088127374649, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.0337848998606205, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.032745085656642914, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.028821710497140884, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.027529222890734673, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.018488464877009392, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.016129065304994583, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.015449956990778446, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.015288706868886948, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.009245289489626884, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.007964420132339, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.007900222204625607, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.007272087968885899, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.007206708192825317, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.004833214450627565, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.004885348957031965, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004607994109392166, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.003245297819375992, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.15.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.05861210823059082, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.05453136935830116, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.05257655680179596, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.04751390218734741, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.02692844718694687, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.025110676884651184, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.031182995066046715, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.028903640806674957, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.02734185941517353, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.02401757426559925, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.022975925356149673, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.015729956328868866, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.013782025314867496, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.012880844995379448, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.012664247304201126, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.007863182574510574, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.006634484976530075, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.006532224360853434, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006045596208423376, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.0059416331350803375, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004118693061172962, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004115115385502577, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.0038486698176711798, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.002680624835193157, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.15.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.21217675507068634, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.1988516002893448, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.19498643279075623, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.1770038902759552, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09880348294973373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.0940229520201683, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.11034195870161057, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.10136155039072037, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.10012117028236389, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08864022046327591, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08483153581619263, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05585891753435135, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04827335476875305, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.047122810035943985, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04685326665639877, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02784830331802368, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.02379455789923668, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.023712430149316788, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.02167275734245777, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.021616177633404732, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.014231705106794834, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.013666899874806404, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.013734661974012852, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.008111080154776573, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.15.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.1710790991783142, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.1408291757106781, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.1301717907190323, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.107075035572052, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.07653875648975372, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.06441807746887207, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.09193224459886551, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.08442355692386627, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.08051875233650208, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.05806002393364906, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.053613997995853424, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.04723804444074631, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.04064418375492096, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03705693036317825, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03617895767092705, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.02369203418493271, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.019799349829554558, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.01955355331301689, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.016318168491125107, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.01584339141845703, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.012987134978175163, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.013391530141234398, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.011865793727338314, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.009815698489546776, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.15.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.16483663022518158, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1555926352739334, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.15279904007911682, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1397506445646286, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.07706844061613083, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07373341172933578, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.08507104963064194, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.07874303311109543, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.07793739438056946, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.06986374408006668, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.0671168714761734, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04328407719731331, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.037672992795705795, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03692047670483589, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03674379363656044, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.021620769053697586, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.01915409229695797, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.019107917323708534, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.017722653225064278, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.017697501927614212, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.011448612436652184, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.011783652007579803, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011203140951693058, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008150911889970303, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.15.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.21932868659496307, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.20702916383743286, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20341576635837555, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.18606457114219666, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10243146866559982, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09800480306148529, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.1129496842622757, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10462864488363266, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10359891504049301, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09285251796245575, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08911430090665817, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05735775828361511, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04987028241157532, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04888671264052391, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04865425080060959, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.028603335842490196, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.024897169321775436, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02483341656625271, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.02293088100850582, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.02290012501180172, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.014845043420791626, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.014605299569666386, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.01451475452631712, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.00921961385756731, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.15.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.2124677151441574, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.19174592196941376, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.18379107117652893, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.16387611627578735, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09654324501752853, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08898936957120895, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11218103021383286, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10306286066770554, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.0989590510725975, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08384484052658081, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07925146818161011, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.056927505880594254, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04924189671874046, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04636194184422493, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04565756767988205, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.028615677729249, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.024381710216403008, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.024077393114566803, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.021938791498541832, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.02150057442486286, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015620962716639042, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.01577048748731613, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014716334640979767, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.011231622658669949, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.16.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.07346747070550919, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.0686689093708992, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06685293465852737, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.0606020912528038, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.034120552241802216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.0322050042450428, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.03847694769501686, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.03563172370195389, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03460050746798515, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.030532632023096085, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.02917763963341713, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.019496683031320572, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.017014019191265106, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.016331303864717484, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.01617000438272953, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.009749307297170162, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.008415649645030499, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.008351361379027367, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.007690763100981712, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.007630760315805674, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005092344246804714, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.005148680880665779, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004866552073508501, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0034178809728473425, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.16.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06596703827381134, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.061532169580459595, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.05951637402176857, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05377674102783203, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.030338089913129807, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.0284251868724823, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.034859172999858856, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.03231656178832054, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.030752386897802353, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.02712136320769787, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.025950361043214798, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.01758134737610817, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.015377693809568882, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.014507348649203777, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.014296727254986763, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.008785378187894821, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.007435454986989498, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.0073423245921730995, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006788937374949455, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006688721477985382, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004583823960274458, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004549379926174879, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004311360884457827, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.002918386599048972, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.16.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.20153626799583435, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.18888743221759796, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.18503336608409882, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.16798235476016998, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.0935201421380043, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08890584856271744, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10440903902053833, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.0960024893283844, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09470323473215103, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08389069139957428, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08016645908355713, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05283705145120621, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04567767679691315, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04457110911607742, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04430917277932167, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.026334352791309357, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.02255145087838173, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.022470947355031967, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.020560214295983315, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.020503448322415352, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.01348880399018526, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.013039886951446533, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.013026258908212185, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007860956713557243, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.16.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.19389747083187103, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.16999657452106476, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.16180793941020966, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.1358940303325653, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.08767405152320862, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.0788951963186264, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.10260628908872604, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.09445447474718094, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.09098219126462936, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.07193198800086975, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.06547784060239792, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.052560701966285706, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.04540202021598816, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.04235268756747246, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.04161415994167328, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.026346547529101372, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.02250472828745842, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.02228626422584057, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.019492218270897865, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.01917964033782482, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.014365683309733868, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.014918939210474491, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.013417646288871765, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.010900696739554405, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.16.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.17208217084407806, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1624014973640442, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.15948748588562012, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.14587122201919556, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08046776801347733, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07698418945074081, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.08887359499931335, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08222165703773499, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08137191832065582, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07291465252637863, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07011142373085022, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04527278617024422, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.03934508189558983, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03856099024415016, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.038374487310647964, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.022613242268562317, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.020019225776195526, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.01996632292866707, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.018515009433031082, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.018491744995117188, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012014328502118587, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012325136922299862, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.01175982877612114, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008551593869924545, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.16.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.22306998074054718, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.21052584052085876, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20681920647621155, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.18914617598056793, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10418002307415009, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09968177229166031, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11489260196685791, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10640855878591537, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10535778850317001, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09440047293901443, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.09055713564157486, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.058307573199272156, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.050712522119283676, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04970454052090645, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04946655407547951, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.029098596423864365, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.025290384888648987, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.025226231664419174, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.02328510954976082, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.023252492770552635, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.015100544318556786, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.014798992313444614, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.01476321741938591, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009285896085202694, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.16.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.21817459166049957, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.19682806730270386, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.18865172564983368, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.16824449598789215, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09919199347496033, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.09143625199794769, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11525411158800125, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10585881769657135, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.10161040723323822, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08614660799503326, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.0814485251903534, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05837560072541237, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.050578538328409195, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04762120172381401, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.046908408403396606, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.029325805604457855, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.02503250725567341, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.02471742033958435, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.02252608723938465, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.02207322046160698, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015905652195215225, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.016167355701327324, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014960472472012043, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.011492478661239147, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.17.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.07204123586416245, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.06739542633295059, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06557357311248779, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.05950874462723732, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.033501699566841125, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03162377327680588, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.03785541281104088, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.03502969443798065, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.033968403935432434, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.03002293035387993, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.028719943016767502, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.01919541507959366, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.016733668744564056, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.016048425808548927, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.01588316634297371, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.00959415826946497, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.008280587382614613, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.008214200846850872, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.00757931312546134, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.007517271209508181, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005025430116802454, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.005087913013994694, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.0048028696328401566, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0033988882787525654, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.17.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06424945592880249, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.05997302755713463, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.05796774849295616, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05244386941194534, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.0295933336019516, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.027713533490896225, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.034030891954898834, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.03153694048523903, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.030024759471416473, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.026449134573340416, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.02535826899111271, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.01717379502952099, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.015036331489682198, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.014157917350530624, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.013943797908723354, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.008592016994953156, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.0072700646705925465, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.00716931140050292, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006640543695539236, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006537703797221184, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004492604173719883, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004465489182621241, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004225966054946184, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0028842431493103504, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.17.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.21997271478176117, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.20638898015022278, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.2022790163755417, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.18379949033260345, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.10240811109542847, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.09749395400285721, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.11427441984415054, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.10502998530864716, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.10372263938188553, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.09199681878089905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08807378262281418, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05789368972182274, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.050012096762657166, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04883073270320892, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.048551008105278015, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.028880944475531578, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.02466363087296486, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.02458418905735016, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.02249784767627716, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.022440675646066666, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.014762645587325096, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.014198154211044312, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.014259159564971924, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.008468183688819408, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.17.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.1861025094985962, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.16149267554283142, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.15234774351119995, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.1273140162229538, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.08456258475780487, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.07483585178852081, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.10023023933172226, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.09202336519956589, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.0872730165719986, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.06758534908294678, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.06202103942632675, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.05139283090829849, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.044324927031993866, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.04091513156890869, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.04008641093969345, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.02586924470961094, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.021881509572267532, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.021579625084996223, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.018674949184060097, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.01826433464884758, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.014235740527510643, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.014733186922967434, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.013180774636566639, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.010868166573345661, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.17.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.16933758556842804, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1598949134349823, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.15703751146793365, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1436672955751419, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.07922511547803879, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07582473754882812, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.08748174458742142, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.0809665396809578, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08014226704835892, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07182013988494873, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.06903160363435745, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.0445479080080986, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.03874628245830536, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03797139227390289, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03778258338570595, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.022255726158618927, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.01971498318016529, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.01966368965804577, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.018244897946715355, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.01821979507803917, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.011819596402347088, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012151837348937988, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011568417772650719, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008435796946287155, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.17.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.21951431035995483, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.2073010951280594, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20366375148296356, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.18630602955818176, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10257887095212936, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09818105399608612, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.1130908951163292, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10477558523416519, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10372902452945709, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09298889338970184, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08922290056943893, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05737313628196716, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.049933500587940216, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.0489434152841568, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04870947450399399, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.028609856963157654, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.02490116097033024, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.024832764640450478, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.022928612306714058, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.02289777807891369, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.014799021184444427, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.01456416118890047, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.014471141621470451, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009127349592745304, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.17.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.21102702617645264, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.1902378648519516, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.18202722072601318, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.16228483617305756, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09589199721813202, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.0881928950548172, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11211393028497696, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10277185589075089, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09829623252153397, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08318124711513519, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07869081199169159, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05673200264573097, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04904954507946968, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04601401090621948, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04527311027050018, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.028549570590257645, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.024126989766955376, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.023804225027561188, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.021674441173672676, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.021210985258221626, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015626253560185432, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015543743036687374, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014682450331747532, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010945271700620651, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.18.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.0707201212644577, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.06626570224761963, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06457439810037613, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.05868087708950043, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.032907694578170776, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.031137270852923393, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.037078872323036194, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.034306302666664124, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03333299234509468, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.0295448899269104, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.028287753462791443, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.018788861110806465, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.016372667625546455, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.015747684985399246, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.015599590726196766, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.009392336942255497, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.008105145767331123, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.00804580096155405, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.007426745258271694, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.0073723685927689075, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.004904561210423708, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.00494353985413909, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004703021142631769, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0032683273311704397, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.18.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06605231761932373, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.06178088113665581, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.05985821411013603, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05426651984453201, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.03045416995882988, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.02864903211593628, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03479374945163727, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.03222484514117241, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.030866792425513268, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.02730175293982029, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.026112958788871765, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.017553802579641342, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.015340609475970268, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.014550608582794666, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.014358826912939548, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.008770945481956005, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.007428609300404787, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.007342434022575617, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.00678524374961853, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.0066980887204408646, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004554810933768749, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004482822027057409, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004312567878514528, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0028136521577835083, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.18.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.21905617415905, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.20591336488723755, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.2020014226436615, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.18385936319828033, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.10223188996315002, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.09742777794599533, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.1139039695262909, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.10471808165311813, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.10351824760437012, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.09205326437950134, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.0882219448685646, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05770661681890488, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.049888934940099716, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.048766400665044785, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04848353564739227, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02877524122595787, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.024635065346956253, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.024553384631872177, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.02249867096543312, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.02246299386024475, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.014709875918924809, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.014164606109261513, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.014237678609788418, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.008461764082312584, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.18.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.17618107795715332, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.1498621106147766, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.13803443312644958, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.11394976079463959, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.08011951297521591, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.06840107589960098, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.09801793098449707, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.09061875939369202, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.08277014642953873, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.06254806369543076, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.056995850056409836, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.050234511494636536, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.043642282485961914, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03888429328799248, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03769920766353607, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.025194760411977768, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.020958492532372475, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.02047092095017433, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.017695685848593712, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.017057687044143677, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.013760068453848362, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.014542687684297562, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.012302268296480179, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.01077541708946228, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.18.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.16766603291034698, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1583239883184433, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.15550187230110168, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.14232711493968964, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.07848035544157028, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07510633021593094, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.08659831434488297, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08018404245376587, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.07937122881412506, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07118388265371323, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.06839118897914886, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.0440988764166832, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.03839946538209915, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03763217851519585, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03744949400424957, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.022030232474207878, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.019581716507673264, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.019529422745108604, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.018134649842977524, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.01811092160642147, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.011704781092703342, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012125285342335701, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011457795277237892, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008492725901305676, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.18.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.21849781274795532, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.2063986212015152, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20274965465068817, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1855849325656891, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.1020757406949997, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09770682454109192, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11253893375396729, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10426974296569824, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10322662442922592, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09257948398590088, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08885600417852402, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05710302293300629, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04969751834869385, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.048710860311985016, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.048476655036211014, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.028481900691986084, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.0248003788292408, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.024734798818826675, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.02285282500088215, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.022820131853222847, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.0147508904337883, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.014539841562509537, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.014421502128243446, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.00916022714227438, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.18.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.2096627652645111, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.18865355849266052, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.18017208576202393, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.16051453351974487, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09530297666788101, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08735568821430206, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.1111840009689331, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10253526270389557, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09775619208812714, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.0825170949101448, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07801448553800583, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.0564117357134819, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.0490163154900074, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04578481987118721, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04499224200844765, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.028336884453892708, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.0240729209035635, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.023718301206827164, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.021607166156172752, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.02110661007463932, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015298198908567429, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015621776692569256, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014277790673077106, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.011060819961130619, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.19.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.06654854118824005, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.062332458794116974, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.0606839694082737, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.0551157183945179, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03092017024755478, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.029225924983620644, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.0349201001226902, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.032308973371982574, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03133415803313255, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.02775985188782215, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.026582200080156326, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.017702363431453705, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.015424574725329876, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.014804814010858536, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.014656680636107922, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.00885206088423729, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.007634314242750406, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.007573192473500967, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.0069967759773135185, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.006938280072063208, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.004636533558368683, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.004676958080381155, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004435807932168245, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0031110348645597696, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.19.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06738607585430145, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.06297990679740906, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.06103832647204399, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05525679141283035, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.031045762822031975, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.02918960712850094, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.035490021109580994, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.03289973363280296, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03147068992257118, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.027815507724881172, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.026634465903043747, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.01791328936815262, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.0156563613563776, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.014828256331384182, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.014636265113949776, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.008951154537498951, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.007591915782541037, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.00750567065551877, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.0069391257129609585, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.0068517220206558704, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004660948645323515, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.0046148356050252914, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004407927393913269, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0029462375678122044, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.19.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.20397239923477173, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.19141334295272827, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.1875607818365097, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.17059314250946045, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09494977444410324, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.09037220478057861, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.1059197261929512, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09748225659132004, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.0961589440703392, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08538779616355896, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.0817696824669838, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05364109203219414, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04644697532057762, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04528240114450455, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.0450163334608078, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.026755832135677338, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.022917132824659348, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.02283930778503418, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.02092934399843216, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.020859817042946815, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.01370256021618843, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.013268877752125263, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.013225872069597244, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.008020443841814995, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.19.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.1901124119758606, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.1624545007944107, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.1531088501214981, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.12129675596952438, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.0865616500377655, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.07595237344503403, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.10102809965610504, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.09284346550703049, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.08918055146932602, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.06653354316949844, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.05790979042649269, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.05194893106818199, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.044911593198776245, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.04212892800569534, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.04144934564828873, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.026174386963248253, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.022871527820825577, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.022650500759482384, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.01919536106288433, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.018926851451396942, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.014617408625781536, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.01569783128798008, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.01378337200731039, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.012112540192902088, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.19.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.16802914440631866, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.15875127911567688, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.155951127409935, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.14275947213172913, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.07859569787979126, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07524315267801285, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.0867382138967514, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08032958209514618, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.07947611063718796, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07132828235626221, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.06860662251710892, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04414001852273941, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.03843506798148155, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03765977546572685, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03747746720910072, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.022040724754333496, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.01955370418727398, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.019505001604557037, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.018112769350409508, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.01808762736618519, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.011671040207147598, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012051780708134174, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011420358903706074, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.00837127398699522, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.19.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.21584947407245636, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.20399215817451477, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20042410492897034, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.18350793421268463, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10078779608011246, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09650591760873795, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11121907085180283, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10298062860965729, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10192368924617767, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09149502217769623, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08791465312242508, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.056378576904535294, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.049085330218076706, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.048106204718351364, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.0478733666241169, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02819725126028061, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.024521391838788986, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02445732243359089, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.02261289395391941, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.022580834105610847, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.01473119668662548, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.014413540251553059, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.014412114396691322, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009140457026660442, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.19.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.20962227880954742, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.187971293926239, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.1786511391401291, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.1592179238796234, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.0953616052865982, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08687151968479156, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11387716978788376, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10375648736953735, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09796564280986786, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08250775188207626, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07835114002227783, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.057954512536525726, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.049985844641923904, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04615247994661331, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04518864303827286, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.02967206947505474, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.024826250970363617, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.024402013048529625, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.0224370826035738, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.021861858665943146, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.016952846199274063, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.01689370907843113, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.01577252894639969, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.012687183916568756, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.20.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.0736672431230545, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.06903738528490067, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06720049679279327, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.061118267476558685, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03429190069437027, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03239291533827782, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.038791920989751816, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.03586568310856819, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03475084900856018, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.03081425651907921, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.029556747525930405, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.019669972360134125, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.01715032570064068, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.016434917226433754, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.016262823715806007, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.009841405786573887, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.008506170473992825, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.00843483954668045, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.00781137403100729, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.007745020557194948, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005169123411178589, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.005265046376734972, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.00493954261764884, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.003561567049473524, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.20.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.0641876757144928, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.05999795347452164, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.0578632690012455, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05245015025138855, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.02955968677997589, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.027628449723124504, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03420054167509079, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.0317918136715889, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.02996538206934929, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.026516156271100044, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.025459174066781998, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.017279058694839478, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.015166259370744228, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.014150175265967846, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.01390541810542345, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.008641744963824749, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.007282700855284929, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.007164475508034229, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006669223308563232, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006548542995005846, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004535720217972994, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004520372953265905, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.0042342557571828365, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0029406538233160973, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.20.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.2123454213142395, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.19947944581508636, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.19561044871807098, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.17814864218235016, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09898114949464798, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.09424282610416412, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.11041960120201111, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.1015445813536644, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.10019338876008987, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08908086270093918, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08537330478429794, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05592332035303116, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.048356570303440094, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04715032875537872, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04689354822039604, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.027889864519238472, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.023828357458114624, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.023729728534817696, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.02176196686923504, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.02170414663851261, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.014240561053156853, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.0137028768658638, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.013748292811214924, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.008135639131069183, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.20.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.19067837297916412, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.15699255466461182, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.1456015408039093, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.11761635541915894, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.08548922091722488, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.0719447210431099, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.10117799043655396, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.09301801770925522, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.08922356367111206, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.06388868391513824, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.05733804777264595, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.0517360158264637, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.044602684676647186, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.041221123188734055, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.04039335250854492, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.026045653969049454, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.021786516532301903, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.02154546231031418, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.017670415341854095, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.0172665324062109, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.014288343489170074, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.014363662339746952, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.01325629185885191, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.010279289446771145, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.20.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.1729351133108139, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.16342300176620483, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.16056033968925476, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1471022367477417, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08080865442752838, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07738487422466278, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.08918413519859314, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08255468308925629, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08171682804822922, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.073371022939682, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07057549059391022, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04532482475042343, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.03943128138780594, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03864714503288269, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03846783936023712, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.022630073130130768, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.019926147535443306, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.019876951351761818, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.018430374562740326, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.01840190216898918, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.011915735900402069, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012068085372447968, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011659599840641022, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008130095899105072, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.20.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.21879827976226807, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.20677395164966583, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.2031814157962799, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.18607980012893677, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.1021495908498764, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09782880544662476, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11256349831819534, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10430532693862915, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10326752066612244, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09274706244468689, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08905965089797974, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05711102485656738, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.049701131880283356, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04872897267341614, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.048496562987565994, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02847297489643097, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.024773113429546356, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02470872551202774, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.022838521748781204, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.022808631882071495, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.014719262719154358, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.01446572132408619, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.014396349899470806, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009040802717208862, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.20.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.2117481529712677, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.18999862670898438, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.18117699027061462, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.16167856752872467, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09610231220722198, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08794153481721878, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11287913471460342, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.1037115752696991, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09869809448719025, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08316852152347565, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07881446182727814, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.057215169072151184, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04950831085443497, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04614425078034401, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04532413184642792, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.02875090390443802, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.02422310598194599, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.02386046200990677, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.021730268374085426, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.02121542952954769, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015566240064799786, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.0156843438744545, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014497574418783188, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.011043268255889416, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.21.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.07024478912353516, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.06585010886192322, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06404494494199753, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.05825494974851608, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03270391374826431, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03088020719587803, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.03712495043873787, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.0343225933611393, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.0331413708627224, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.029401741921901703, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.028219223022460938, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.018845317885279655, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.016405407339334488, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.015685416758060455, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.015514467842876911, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.00942945946007967, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.008132638409733772, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.00806073471903801, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.007473052479326725, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.007404672913253307, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.0049674296751618385, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.005060529336333275, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004734616260975599, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0034468688536435366, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.21.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06285786628723145, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.05881211534142494, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.05657978355884552, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.051400501281023026, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.02896224521100521, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.02703455090522766, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03370858356356621, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.031315647065639496, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.029346836730837822, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.026015881448984146, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.024994246661663055, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.01698748767375946, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.014907993376255035, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.013852630741894245, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.013604466803371906, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.008493765257298946, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.007114951498806477, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.006992946844547987, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006525692064315081, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006395671050995588, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.0044509535655379295, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004406151827424765, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004140186123549938, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.002821176080033183, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.21.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.1991378664970398, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.18721896409988403, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.1834515929222107, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.16713227331638336, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09262259304523468, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08819587528705597, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10368365794420242, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09522556513547897, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09376418590545654, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08344949036836624, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08007483184337616, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.052520282566547394, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.0453270748257637, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04413950443267822, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04386596754193306, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02618739940226078, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.02229359745979309, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.02220369130373001, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.020387154072523117, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.02032429352402687, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013394378125667572, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.01285578403621912, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.012875488959252834, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007655334658920765, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.21.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.20357851684093475, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.1649073213338852, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.15172144770622253, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.12631434202194214, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.09107542783021927, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.07558678835630417, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.10884051024913788, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.09976205229759216, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.09530611336231232, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.06855572760105133, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.06253468245267868, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.055529624223709106, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.04806293919682503, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.044144414365291595, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.043194323778152466, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.027954567223787308, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.02375667355954647, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.02347259223461151, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.019582871347665787, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.0190652497112751, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.015530157834291458, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.01620190590620041, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.014356523752212524, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.012120718136429787, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.21.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.17533312737941742, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.16571027040481567, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.16283336281776428, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1491069793701172, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08191745728254318, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07845298200845718, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09043139219284058, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08368387818336487, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08283649384975433, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07435232400894165, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07147665321826935, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.045974258333444595, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.039980754256248474, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03919370472431183, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03900888189673424, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.022964436560869217, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.020240742713212967, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02018800377845764, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.018724795430898666, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.01869870536029339, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012117922306060791, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012305035255849361, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011866864748299122, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008345208130776882, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.21.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.22200873494148254, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.20986127853393555, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20620988309383392, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.18881091475486755, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10369257628917694, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09929881244897842, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11427070200443268, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10588549077510834, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10485338419675827, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09412868320941925, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.09041409939527512, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05799385905265808, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.05047810450196266, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.049492496997117996, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04925529658794403, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.028932221233844757, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.025233842432498932, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.025164838880300522, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.023273631930351257, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.02324305847287178, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.015013241209089756, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.014840018004179, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.014689965173602104, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009422468952834606, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.21.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.21277789771556854, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.19119003415107727, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.18236884474754333, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.16284935176372528, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09667489677667618, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08853327482938766, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11329569667577744, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10426734387874603, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09924230724573135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.0837879478931427, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07941745221614838, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.057459842413663864, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04980519786477089, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.046425480395555496, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04560442268848419, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.02901678904891014, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.024371540173888206, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.02400287799537182, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.021883316338062286, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.02136288210749626, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015961162745952606, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015780022367835045, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014939305372536182, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.011107399128377438, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.22.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.07096942514181137, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.06654626876115799, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06473150849342346, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.058886606246232986, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.033044178038835526, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.031220437958836555, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.037459846585989, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.0346660353243351, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03348258137702942, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.029706129804253578, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.02852988801896572, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.018994862213730812, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.016557734459638596, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.01583600789308548, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.01566305384039879, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.009499862790107727, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.008188039995729923, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.008115055970847607, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.007520108483731747, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.007448791526257992, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.004987326450645924, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.005057641305029392, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004759321920573711, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0033979262225329876, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.22.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06452056765556335, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.06040358543395996, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.05827413871884346, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05288652703166008, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.029765039682388306, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.02788127399981022, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.034537170082330704, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.032030362635850906, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03019212931394577, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.026725320145487785, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.025678550824522972, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.017421506345272064, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.015238012187182903, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.014253329485654831, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.014008031226694584, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.008710701018571854, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.007310046814382076, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.007192864548414946, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006696788128465414, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006580031011253595, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004554389044642448, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004502027295529842, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004261049907654524, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.002887629671022296, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.22.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.20749357342720032, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.1949741244316101, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.19109860062599182, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.1739468276500702, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09664754569530487, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.09209205210208893, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10777658224105835, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09920933097600937, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09784458577632904, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08702528476715088, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08341348171234131, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05459505319595337, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04723953828215599, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04607009515166283, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.045786187052726746, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.027219532057642937, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.023279551416635513, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.023191720247268677, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.021278953179717064, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.02121165208518505, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013913113623857498, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.013403613120317459, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.013428261503577232, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007995521649718285, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.22.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.16280129551887512, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.13641417026519775, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.12772658467292786, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.10222908854484558, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.0726008340716362, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.06346239894628525, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.08573083579540253, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.0790414959192276, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.07625497132539749, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.055485595017671585, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.050395891070365906, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.04378708451986313, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.037876881659030914, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03494035825133324, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03422864153981209, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.021914485841989517, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.018394287675619125, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.018213095143437386, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.01510593295097351, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.014755623415112495, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.011810054071247578, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.012042135000228882, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.01089058630168438, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.008498079143464565, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.22.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.1756303459405899, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1660352349281311, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.16313737630844116, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1494293212890625, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08211510628461838, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07864286750555038, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09071860462427139, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08387220650911331, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08303499966859818, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07457681000232697, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07175926864147186, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.046159129589796066, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.040091875940561295, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.039301421493291855, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03911333531141281, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.023078959435224533, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.020325684919953346, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.020275231450796127, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.01881289854645729, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.01878824457526207, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.01225211936980486, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.01239853911101818, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011996329762041569, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008461466059088707, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.22.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.2197279930114746, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.2077324241399765, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20416247844696045, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.18694917857646942, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10264238715171814, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.0983067974448204, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11315419524908066, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10482184588909149, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.103785939514637, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09319385886192322, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08956895023584366, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05744362249970436, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.049966827034950256, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04898481443524361, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.048751842230558395, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.028646400198340416, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.02494228444993496, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.024875495582818985, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.02300156094133854, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.02297241799533367, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.014864439144730568, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.014620605856180191, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.01454334706068039, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009218776598572731, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.22.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.2093135267496109, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.18748396635055542, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.1783040314912796, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.15952147543430328, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09494911879301071, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08654846996068954, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11188195645809174, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10311434417963028, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09768923372030258, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.0822933167219162, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07799538224935532, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05651504173874855, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04928984493017197, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04564732313156128, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04475332424044609, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.028293687850236893, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.024020645767450333, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.023616451770067215, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.02158350683748722, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.021014047786593437, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015102991834282875, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.0156744085252285, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.013972722925245762, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.011066959239542484, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.23.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.06603747606277466, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.061899248510599136, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06012551486492157, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.05469905585050583, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03079047054052353, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.0290130153298378, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.03507765755057335, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.03244749456644058, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.031190156936645508, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.02767406404018402, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.026590777561068535, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.017825376242399216, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.015522831119596958, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.014782202430069447, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.014603368006646633, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.008944638073444366, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.00767777394503355, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.007600836455821991, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.007055896334350109, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.00698078190907836, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.004749934654682875, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.004806152079254389, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004517195746302605, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.003288642503321171, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.23.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06097154691815376, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.05704647675156593, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.0548953078687191, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.04981492459774017, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.028133483603596687, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.02626153826713562, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.032764531672000885, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.030414212495088577, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.02850925363600254, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.025258315727114677, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.024264691397547722, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.01651185378432274, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.01449626125395298, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.013468027114868164, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.01322280801832676, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.00826213601976633, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.0069272518157958984, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.006807100493460894, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006347117479890585, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.0062247649766504765, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004338652826845646, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004303434398025274, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004038079176098108, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0027807410806417465, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.23.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.20440275967121124, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.19186218082904816, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.1879359632730484, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.1712074875831604, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09523320943117142, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.0906151607632637, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10660664737224579, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09796298295259476, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.0963732972741127, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08567474037408829, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08222611993551254, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.053965866565704346, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.0466698557138443, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04539214074611664, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04510916769504547, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02691534347832203, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.022927582263946533, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.02283327281475067, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.02095513790845871, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.02087690308690071, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013766990974545479, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.013239811174571514, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.013250332325696945, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007914735935628414, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.23.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.15793873369693756, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.13024887442588806, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.1155797466635704, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.09518962353467941, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.07024367898702621, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.05756809189915657, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.0930386409163475, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.08482763916254044, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.07402396202087402, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.05437927320599556, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.051238253712654114, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.04761651158332825, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.04120061919093132, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03469239920377731, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03299349173903465, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.024232424795627594, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.019569678232073784, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.018890922889113426, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.016834469512104988, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.015885384753346443, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.01387324184179306, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.014801017008721828, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.012007530778646469, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.01164170354604721, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.23.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.17562811076641083, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.16603679955005646, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.16312971711158752, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.14945226907730103, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08207982778549194, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07861267030239105, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09067301452159882, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08384504914283752, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08299440890550613, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07452765852212906, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07172757387161255, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04610661789774895, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04006465896964073, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03927299380302429, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03908655047416687, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.023036688566207886, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.020267769694328308, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02021869271993637, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.018754595890641212, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.018730325624346733, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012172423303127289, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012305856682360172, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.01191476546227932, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008323939517140388, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.23.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.21837836503982544, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.20647110044956207, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20294761657714844, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.18586677312850952, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.1020166277885437, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.0977085679769516, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11248958855867386, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10417111217975616, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10312314331531525, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09264113754034042, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08902952820062637, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.057104334235191345, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04966164007782936, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.048686880618333817, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04845813661813736, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02849297784268856, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.024794960394501686, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.024733711034059525, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.022873103618621826, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.022844580933451653, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.014809382148087025, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.014544432051479816, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.0144923385232687, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.00918849091976881, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.23.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.20880009233951569, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.18770509958267212, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.17882800102233887, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.1605115830898285, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09482131153345108, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08671511709690094, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11138623207807541, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10274605453014374, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09740699082612991, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08253492414951324, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07848169654607773, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05638322979211807, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04900573566555977, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04547957703471184, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04460891708731651, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.028317593038082123, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.02373998984694481, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.02334834448993206, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.021347248926758766, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.020800884813070297, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015212955884635448, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015235095284879208, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014129308052361012, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010495360009372234, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.24.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.06489382684230804, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.060865916311740875, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.059144679456949234, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.05385095253586769, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03021066077053547, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.028498413041234016, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.03435717523097992, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.03180598467588425, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.030610108748078346, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.027180828154087067, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.02610466256737709, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.01739475131034851, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.015180938877165318, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.014469997957348824, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.014297880232334137, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.008702727034687996, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.007466259878128767, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.007394404616206884, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.006856491323560476, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.006784421857446432, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.00456230528652668, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.004601760301738977, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004342064261436462, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.003064004937186837, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.24.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06627025455236435, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.062086835503578186, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.06006661430001259, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.054561249911785126, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.030599668622016907, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.028752662241458893, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.035200413316488266, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.03266223520040512, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03101089783012867, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.027518561109900475, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.026420755311846733, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.017768429592251778, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.015541397966444492, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.014638738706707954, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.014419888146221638, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.008884740062057972, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.007498660124838352, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.007389725185930729, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.0068704718723893166, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006767405662685633, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.00464539835229516, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004578606225550175, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004367445129901171, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.002912786090746522, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.24.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.19586336612701416, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.1841004341840744, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.18043476343154907, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.16431808471679688, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.0912313163280487, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08682134002447128, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10191937536001205, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09380221366882324, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09232943505048752, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.0821775421500206, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.07890981435775757, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.051600512117147446, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04466748982667923, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04348885640501976, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04321248084306717, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.025729933753609657, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.02197578176856041, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.021884972229599953, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.020102817565202713, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.020028114318847656, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013145514763891697, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.012691938318312168, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.01266917772591114, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007584345992654562, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.24.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.19406482577323914, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.15129174292087555, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.13579529523849487, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.10408278554677963, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.08605349808931351, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.06906473636627197, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.10503634065389633, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.09660819917917252, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.09106844663619995, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.0599646233022213, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.0545559898018837, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.05384134501218796, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.04654815047979355, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.041831158101558685, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.040651194751262665, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.027073366567492485, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.022583624348044395, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.02222980558872223, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.0178298931568861, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.017199601978063583, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.014892679639160633, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.0156613327562809, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.013439018279314041, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.011697424575686455, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.24.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.17679090797901154, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.16716544330120087, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.16426536440849304, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1504632532596588, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08266793191432953, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07916955649852753, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09123918414115906, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08443018794059753, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.0835801213979721, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07507380098104477, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07222061604261398, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.046417687088251114, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04034598544239998, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03955335170030594, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03936198353767395, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.023186558857560158, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.020407529547810555, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.020359233021736145, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.01888437569141388, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.018856100738048553, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012228671461343765, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012386181391775608, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011969625018537045, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008370506577193737, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.24.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.21896272897720337, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.20707319676876068, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.2034912109375, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1864672154188156, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10227328538894653, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09797180444002151, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.1127835065126419, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10444963723421097, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10341440886259079, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.0929006040096283, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.0893007293343544, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05723106861114502, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04980015009641647, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.0488109290599823, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04857785999774933, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.028560880571603775, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.02484922856092453, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.024785323068499565, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.022922366857528687, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.022889934480190277, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.014821339398622513, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.014560669660568237, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.014499231241643429, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009171722456812859, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.24.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.21041065454483032, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.18882492184638977, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.17955531179904938, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.1613485962152481, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09538961201906204, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08705350011587143, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11260761320590973, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10384123772382736, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09813691675662994, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08306661248207092, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07901415228843689, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.0569005012512207, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04951074719429016, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04577197507023811, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.0448499396443367, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.028505628928542137, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.023910287767648697, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.023497600108385086, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.021515140309929848, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.020932301878929138, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015213924460113049, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.0154042299836874, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014045139774680138, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010609530843794346, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.25.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.07301414757966995, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.06847447901964188, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06664727628231049, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.06065108999609947, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.034018129110336304, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03215685114264488, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.03846533223986626, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.03561139106750488, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.034469012171030045, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.030588101595640182, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.0293483454734087, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.019523005932569504, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.01703169196844101, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.016328969970345497, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.01616036146879196, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.009770735166966915, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.008489574305713177, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.008421468548476696, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.0078076147474348545, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.007742974907159805, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005162149667739868, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.005308043677359819, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004938553087413311, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0036603135522454977, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.25.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06900457292795181, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.06466884911060333, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.06256510317325592, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05681286007165909, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.0319182425737381, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.029959851875901222, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.0367177352309227, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.033986903727054596, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03235306590795517, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.028674079105257988, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.027542538940906525, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.018524395301938057, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.016225244849920273, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.015266990289092064, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.01503837201744318, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.0092737702652812, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.007820257917046547, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.007713838014751673, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.007167312316596508, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.0070586963556706905, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004836727865040302, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004790208302438259, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004550036508589983, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.003065724391490221, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.25.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.20153607428073883, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.18927033245563507, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.18534699082374573, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.16867312788963318, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09367264062166214, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.0891314223408699, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10452064126729965, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09631223976612091, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09484226256608963, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08426296710968018, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08076523244380951, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05290794000029564, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.045843735337257385, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04463915526866913, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.044349305331707, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02637752890586853, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.022535739466547966, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.022446665912866592, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.020585643127560616, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.020512016490101814, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013479461893439293, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.012986733578145504, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.012994864024221897, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007700867485255003, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.25.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.19437526166439056, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.14263306558132172, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.12306355684995651, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.09958863258361816, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.0843319222331047, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.06271827965974808, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.1055450290441513, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.09739428013563156, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.09102855622768402, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.05641607567667961, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.05384591594338417, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.053750645369291306, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.046495042741298676, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.04055419936776161, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.039058513939380646, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.026878105476498604, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.02119147777557373, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.020748373121023178, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.015827028080821037, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.01481856033205986, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.01424474362283945, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.014091632328927517, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.012339620850980282, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.009363471530377865, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.25.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.18167702853679657, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.17174537479877472, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.16875995695590973, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.15463317930698395, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.0849430114030838, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.08134843409061432, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09392713010311127, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08677453547716141, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08589759469032288, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.0771428644657135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07423446327447891, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.047763556241989136, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04145745187997818, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04063335061073303, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04044085741043091, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.023870017379522324, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.02093465067446232, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02088095434010029, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.01936076208949089, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.019332759082317352, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012630937620997429, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012650980614125729, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.012363286688923836, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008487273938953876, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.25.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.22268687188625336, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.2106081247329712, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20698033273220062, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.18959815800189972, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10405601561069489, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09968307614326477, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11479005962610245, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10630358010530472, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10522690415382385, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.0945269837975502, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.09084548056125641, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05828263610601425, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.050666116178035736, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.0496680811047554, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04943033680319786, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.029054757207632065, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.025276141241192818, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02520804852247238, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.023311080411076546, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.023282192647457123, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.01506818551570177, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.014800459146499634, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.01473999209702015, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009306477382779121, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.25.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.21615658700466156, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.19416016340255737, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.18489369750022888, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.1661994904279709, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09796281158924103, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08958083391189575, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11538097262382507, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10632550716400146, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.10077735036611557, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08540818095207214, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.08126996457576752, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05838286131620407, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.050633955746889114, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04695027694106102, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.046048782765865326, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.02930721826851368, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.024426594376564026, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.0240161269903183, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.0219695046544075, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.021389076486229897, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015730757266283035, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015576698817312717, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014561057090759277, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.01057199202477932, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.26.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.07138661295175552, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.06703991442918777, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06527766585350037, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.059471432119607925, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03330736607313156, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.031512510031461716, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.03775904327630997, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.034870315343141556, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.033728502690792084, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.02999885007739067, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.028848037123680115, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.01918468438088894, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.016685811802744865, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.015996240079402924, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.015825718641281128, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.009621315635740757, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.0083225192502141, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.008253798820078373, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.007666627876460552, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.007603089325129986, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005129358731210232, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.0052146147936582565, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004915218334645033, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.003607937367632985, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.26.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06766849011182785, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.06347739696502686, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.06136482208967209, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05580488592386246, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.031298018991947174, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.029404837638139725, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.036032188683748245, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.033353064209222794, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03172266110777855, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.028157157823443413, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.027058018371462822, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.018221549689769745, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.015912238508462906, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.014990346506237984, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.014759769663214684, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.009113570675253868, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.0077093555592000484, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.007604341953992844, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.007075173314660788, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.0069700428284704685, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004789040423929691, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004752687178552151, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.0045105586759746075, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0031023724004626274, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.26.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.19742614030838013, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.18551157414913177, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.18178614974021912, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.16561272740364075, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09181006997823715, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08737992495298386, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.1024133637547493, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09440317749977112, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09297100454568863, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08270473778247833, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.07929325848817825, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05177886039018631, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.044910136610269547, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04375103861093521, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04348565265536308, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02582070603966713, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.02208155393600464, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.02199159935116768, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.02018263377249241, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.020117631182074547, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013195029459893703, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.012691606767475605, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.012748414650559425, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007505594752728939, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.26.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.18974663317203522, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.15522417426109314, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.14269278943538666, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.11003392934799194, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.08428832143545151, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.07078932970762253, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.10269062221050262, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.09389769285917282, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.08854168653488159, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.06290513277053833, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.05454019457101822, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.05250224843621254, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.04526566341519356, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.040989719331264496, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03992350399494171, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.026634521782398224, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.02223651111125946, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.021909065544605255, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.01835206151008606, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.01784726418554783, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.015107494778931141, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.01548681315034628, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.013864231295883656, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.011736011132597923, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.26.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.18474039435386658, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.17466001212596893, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.17163246870040894, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1572696715593338, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08637090772390366, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.08271180838346481, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09534452855587006, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08821973949670792, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.0873265340924263, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07843354344367981, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07548145949840546, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04845171421766281, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04212385043501854, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.041286714375019073, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04109238088130951, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.024199843406677246, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.02121392823755741, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02115970477461815, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.019603554159402847, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.019579313695430756, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012713299132883549, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.0127263143658638, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.012445292435586452, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008428363129496574, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.26.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.2235259711742401, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.21138742566108704, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20775651931762695, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.19035089015960693, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10447794198989868, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.10007549077272415, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11522889137268066, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10669296979904175, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10561180114746094, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09487871080636978, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.09122247993946075, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.058483101427555084, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.05085642635822296, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04985649511218071, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04961951822042465, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.029170239344239235, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.025370024144649506, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.025305068120360374, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.023396942764520645, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.023369189351797104, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.01513998955488205, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.014845144934952259, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.014813283458352089, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009325307793915272, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.26.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.2138708084821701, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.19181133806705475, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.18212415277957916, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.16383692622184753, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09686564654111862, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08826031535863876, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11515207588672638, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10589828342199326, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09974808245897293, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.0844983384013176, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.08046825975179672, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05813021957874298, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.05048306658864021, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.0465240553021431, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.045551106333732605, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.029173947870731354, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.024380387738347054, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.023943254724144936, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.02198205515742302, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.02137286216020584, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015693804249167442, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015818746760487556, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014457546174526215, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010999205522239208, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.27.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.073013074696064, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.06857231259346008, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.0667516216635704, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.060803331434726715, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03401607647538185, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03217208757996559, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.038482848554849625, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.03564196452498436, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03445477783679962, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.030630161985754967, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.029390405863523483, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.01949957199394703, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.017003867775201797, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.016286136582493782, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.016113506630063057, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.009751416742801666, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.008388749323785305, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.008314753882586956, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.007705289404839277, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.007636681664735079, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005104394163936377, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.005135158076882362, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004876081831753254, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0033923713490366936, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.27.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06732349097728729, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.06308801472187042, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.06102234870195389, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05544648319482803, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.031146280467510223, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.02922382578253746, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.0359034426510334, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.03330724313855171, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03155214712023735, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.028017761185765266, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.026892786845564842, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.018120164051651955, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.015885842964053154, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.014906282536685467, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.014661857858300209, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.009066352620720863, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.007622255478054285, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.007509548217058182, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006985379382967949, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006871037185192108, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004723893012851477, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004647531546652317, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004431287292391062, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.002940469654276967, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.27.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.20962387323379517, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.19713328778743744, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.1932905912399292, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.17623266577720642, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09795089811086655, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.09325379133224487, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.1093042865395546, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.1005689725279808, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09915921092033386, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08824926614761353, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08464939892292023, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.0553630106151104, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04793202504515648, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04672775790095329, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.046438660472631454, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.027605000883340836, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.0235983207821846, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.023502875119447708, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.0215785913169384, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.02151469886302948, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.014109664596617222, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.013607277534902096, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.013618238270282745, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.008117538876831532, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.27.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.15266422927379608, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.12612685561180115, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.11708665639162064, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.09587747603654861, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.06811480224132538, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.057991817593574524, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.08137260377407074, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.07465232908725739, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.07147304713726044, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.05212656408548355, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.04701652005314827, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.041536152362823486, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.035805657505989075, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03282482922077179, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03209621459245682, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.020875364542007446, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.017312800511717796, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.01710687205195427, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.014243321493268013, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.013877765275537968, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.011391901411116123, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.011409100145101547, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.010460146702826023, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.008069724775850773, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.27.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.1751105785369873, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1655687838792801, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.16265755891799927, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.14914295077323914, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08186812698841095, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07837431132793427, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09039858728647232, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.0836295336484909, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08278120309114456, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07433230429887772, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07150658220052719, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.0459437258541584, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.0399317592382431, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03913351148366928, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03894694149494171, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.022942688316106796, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.020141681656241417, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02008839324116707, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.01862374320626259, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.018598124384880066, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012070629745721817, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012135111726820469, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011813058517873287, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008102581836283207, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.27.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.21435695886611938, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.20266777276992798, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.19916099309921265, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.18255464732646942, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10012071579694748, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09587877243757248, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11038877815008163, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10225191712379456, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10122651606798172, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09093404561281204, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08743275701999664, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05603574588894844, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04874112457036972, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.047771841287612915, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04754435643553734, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02793845906853676, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.024312041699886322, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.024247732013463974, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.022422777488827705, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.022390590980648994, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.014484334737062454, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.014234503731131554, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.014167007058858871, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008944446220993996, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.27.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.19960352778434753, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.17834943532943726, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.16893163323402405, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.1517590433359146, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.0903000682592392, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08191603422164917, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.10765445232391357, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.09915097057819366, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09313572198152542, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.07841936498880386, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07474718242883682, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.054420508444309235, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04726484417915344, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04333709180355072, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04237625375390053, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.027263594791293144, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.02264493890106678, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.022207021713256836, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.02032872661948204, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.01971368119120598, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.01457911729812622, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.014649474062025547, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.013342638500034809, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.01004634890705347, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.28.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.06679371744394302, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.06271722167730331, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.060962505638599396, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.05556434020400047, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03114851377904415, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.02940579503774643, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.0353873074054718, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.03276913985610008, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03155532851815224, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.0280479583889246, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.02695504203438759, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.017944328486919403, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.015655267983675003, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.014931166544556618, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.014758751727640629, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.008977364748716354, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.0077160419896245, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.007640669587999582, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.007093869149684906, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.00701897731050849, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.004717577714473009, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.0047703031450510025, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004494712688028812, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0031967712566256523, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.28.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06071702763438225, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.05698813125491142, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.05487039312720299, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.049926649779081345, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.02811514027416706, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.026309695094823837, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03278104588389397, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.030389107763767242, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.02850721962749958, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.025342412292957306, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.02438821829855442, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.016554689034819603, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.014456260949373245, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.013465945608913898, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.01322765089571476, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.008284609764814377, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.006921176332980394, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.006802408955991268, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006357059348374605, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006230007391422987, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004347995854914188, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004283149726688862, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004052072763442993, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.002751404419541359, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.28.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.20844729244709015, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.19612666964530945, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.1923646628856659, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.1754491627216339, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.0975026935338974, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.09287086874246597, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10870452225208282, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09999579936265945, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09869968891143799, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08784619718790054, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08426456153392792, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05504781752824783, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04764325171709061, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04649612680077553, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.046212904155254364, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02746887132525444, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.023461727425456047, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.023380862548947334, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.021443882957100868, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.021382765844464302, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.01402300875633955, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.013472966849803925, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.013530531898140907, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007976383902132511, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.28.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.1646372675895691, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.11168614029884338, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.08438725024461746, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.07972580194473267, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.0688927099108696, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.04100053012371063, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.09744396805763245, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.08994719386100769, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.0775904506444931, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.049064360558986664, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.049520451575517654, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.049767810851335526, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.043187495321035385, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03375290706753731, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.031148994341492653, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.02490469440817833, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.01846245490014553, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.017607081681489944, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.014904003590345383, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.013176553882658482, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.013461330905556679, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.013947002589702606, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.010531681589782238, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.010010434314608574, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.28.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.17476102709770203, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.16524828970432281, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1623944491147995, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1489282101392746, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08165199309587479, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07820142805576324, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09015446156263351, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08340714871883392, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08255889266729355, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07418198138475418, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07139788568019867, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.045790936797857285, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.03981611132621765, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03902557119727135, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03884187713265419, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.022862505167722702, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.020050715655088425, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.020000549033284187, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.018542883917689323, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.01851450651884079, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.011994175612926483, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012037405744194984, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011736134998500347, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007973257452249527, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.28.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.21238717436790466, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.2008974552154541, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.19743485748767853, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.18102969229221344, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.09920239448547363, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09502900391817093, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.10937493294477463, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10130653530359268, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.1003081202507019, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.0901506245136261, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08670784533023834, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05551630258560181, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04828890785574913, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.047343626618385315, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04712282866239548, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02768896147608757, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.02410360984504223, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02404026873409748, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.022242726758122444, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.0222113449126482, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.014358396641910076, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.014123857952654362, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.01404779776930809, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008898559957742691, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.28.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.1987042874097824, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.1780664622783661, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.1683375984430313, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.1512984186410904, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09007881581783295, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.0816844031214714, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.10859385877847672, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.09954725205898285, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09284225106239319, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.07851263135671616, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07492136210203171, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05478300899267197, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04770437255501747, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.043497808277606964, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04244934767484665, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.027584809809923172, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.02317281998693943, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.022694548591971397, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.02098408155143261, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.020334748551249504, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.01498002652078867, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015585299581289291, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.013641790486872196, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.011323663406074047, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.29.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.06545644998550415, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.06141247972846031, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.05960973724722862, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.05429310351610184, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.030458975583314896, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.02872350998222828, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.03465919569134712, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.032124076038599014, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.030861346051096916, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.027404682710766792, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.026323871687054634, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.017569726333022118, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.015330888330936432, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.014591960236430168, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.014416688121855259, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.008798692375421524, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.007531896233558655, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.007453775964677334, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.006917379796504974, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.006840778049081564, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.0046209548600018024, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.004649115726351738, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.0043950132094323635, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0030904379673302174, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.29.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06202352046966553, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.05808670446276665, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.05603151395916939, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.050956543534994125, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.028688332065939903, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.026827320456504822, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03332402557134628, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.030889928340911865, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.029064081609249115, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.025814641267061234, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.024820921942591667, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.01683676615357399, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.014733513817191124, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.013729888945817947, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.013484718278050423, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.008413338102400303, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.007032969035208225, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.0069129024632275105, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006446370389312506, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.0063223401084542274, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004389528650790453, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004321228247135878, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.0040930090472102165, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.00274363299831748, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.29.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.19759395718574524, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.1855648010969162, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.1817062795162201, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.16566666960716248, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09202910959720612, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08758702129125595, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10285163670778275, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09467940032482147, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09319932758808136, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08287683129310608, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.07952220737934113, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05206381529569626, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.045052818953990936, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.043893035501241684, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04359892010688782, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02593967504799366, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.022159438580274582, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.02206498198211193, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.020229194313287735, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.020168915390968323, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013259648345410824, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.012740167789161205, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.012785159051418304, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007547273300588131, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.29.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.16218137741088867, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.1276882290840149, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.11268384009599686, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.09488360583782196, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.07140054553747177, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.057286232709884644, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.09266123175621033, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.0844230055809021, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.07611769437789917, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.05384165421128273, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.05087605491280556, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.04750349745154381, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.040900759398937225, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03507791832089424, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.033573005348443985, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.02418407052755356, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.01952771469950676, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.01900295540690422, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.016451198607683182, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.015583815984427929, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.013836543075740337, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.01444119494408369, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.012164836749434471, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.011219087988138199, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.29.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.1745014190673828, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1650620847940445, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1622185856103897, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.14881813526153564, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08153854310512543, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07811926305294037, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.08997919410467148, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.083269864320755, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.0824485793709755, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07411576807498932, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07133091986179352, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04572730138897896, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.03975965082645416, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03897181525826454, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03879184275865555, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.022838065400719643, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.020036760717630386, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.019985239952802658, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.01853257790207863, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.018507985398173332, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.011988155543804169, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012037748470902443, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011732365936040878, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007994227111339569, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.29.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.2110351026058197, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.19967740774154663, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1962898224592209, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.18003980815410614, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.09857672452926636, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09444869309663773, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.10870761424303055, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10066469758749008, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.09965963661670685, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.08960146456956863, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08620753884315491, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05519840121269226, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.047990381717681885, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04705058038234711, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04683048650622368, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.0275278277695179, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.023961937054991722, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.023902010172605515, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.02212386019527912, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.022091999650001526, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.014295040629804134, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.014056363143026829, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.01398615911602974, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008877757005393505, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.29.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.1985020935535431, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.17802174389362335, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.16827034950256348, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.15136057138442993, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.08999631553888321, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08161404728889465, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.10862956196069717, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.0995926558971405, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.0927828848361969, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.07857099175453186, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07496146112680435, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05501515418291092, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.047672089189291, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.0434151254594326, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04235697537660599, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.027964547276496887, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.02305033802986145, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.02256876602768898, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.02087118662893772, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.020215047523379326, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015633761882781982, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.01541623380035162, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014364402741193771, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.011090549640357494, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.30.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.07611146569252014, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.0712701752781868, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06931106746196747, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.0630335882306099, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03547618165612221, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.033443499356508255, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.04025578498840332, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.037217892706394196, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.0359540656208992, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.03184480220079422, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.030560621991753578, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.02046435885131359, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.017857465893030167, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.017053013667464256, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.016858991235494614, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.010273521766066551, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.008911681361496449, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.008831525221467018, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.008194931782782078, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.008117866702377796, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005484472028911114, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.005643037147819996, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.005235057324171066, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.003957777284085751, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.30.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06598283350467682, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.061722323298454285, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.059485435485839844, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05408134311437607, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.03044762834906578, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.028458379209041595, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03535999730229378, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.03271591290831566, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03086550533771515, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.027333581820130348, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.02627750113606453, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.017830030992627144, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.015591554343700409, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.014553341083228588, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.014309821650385857, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.008916746824979782, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.007474089041352272, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.007357760798186064, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.0068431138060987, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006720718462020159, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004672241397202015, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004613805562257767, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004363573621958494, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.002968881046399474, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.30.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.18995121121406555, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.1780189871788025, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.1741160750389099, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.15832479298114777, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.08801770955324173, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08356890827417374, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.09852573275566101, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09067823737859726, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.08921441435813904, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.0789589136838913, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.07568502426147461, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.04982548579573631, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.043153390288352966, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04194425418972969, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.041657544672489166, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02484545111656189, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.021164491772651672, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.021066900342702866, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.01929406262934208, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.019213441759347916, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.012688684277236462, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.012211508117616177, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.012216766364872456, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007251039147377014, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.30.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.1386619359254837, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.1107093021273613, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.1011408269405365, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.07574926316738129, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.062129490077495575, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.05161122605204582, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.07468964159488678, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.0681886225938797, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.06457404792308807, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.04200777783989906, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.039324793964624405, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.038374099880456924, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.03357483446598053, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.030916057527065277, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03027658350765705, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.019986826926469803, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.017868410795927048, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.01766582764685154, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.014618766494095325, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.014294253662228584, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.01210995763540268, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.013454440981149673, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.011402400210499763, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.01130421832203865, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.30.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.17718006670475006, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.16755297780036926, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.164687380194664, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1509554237127304, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08283131569623947, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.0793268084526062, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09146740287542343, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.0846254751086235, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08376149833202362, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07523190975189209, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.0724186822772026, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.046464014798402786, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04038718342781067, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03958107903599739, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03939325362443924, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02320890501141548, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.02030288800597191, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.020248178392648697, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.018758006393909454, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.01872941106557846, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012166935950517654, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012132423929870129, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011905884370207787, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007962040603160858, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.30.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.21593397855758667, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.2042631059885025, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20070786774158478, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1840214878320694, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10087741166353226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09660372883081436, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11131856590509415, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.1030513346195221, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.1019887775182724, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09162554144859314, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.0881594717502594, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05649697408080101, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.049124930053949356, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04814780503511429, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04791390895843506, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02819262258708477, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.024510538205504417, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02444499172270298, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.02260737307369709, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.02257738821208477, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.014650749042630196, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.014362228102982044, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.014333290047943592, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009044412523508072, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.30.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.2020447701215744, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.18285955488681793, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.17399340867996216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.15671762824058533, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09193195402622223, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08420123904943466, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.10907395929098129, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10050330311059952, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09443958848714828, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08083213120698929, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07697303593158722, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05524994060397148, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04811213165521622, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.044286251068115234, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04334002360701561, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.027958881109952927, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.02341344766318798, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.02299337275326252, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.02128385379910469, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.020711122080683708, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015433772467076778, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015457654371857643, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014292759820818901, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.011066950857639313, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.31.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.07617472857236862, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.07164263725280762, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06982450187206268, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.06368597596883774, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03553742915391922, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03365595266222954, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.04016662389039993, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.037132613360881805, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03598424792289734, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.03204469010233879, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.030812622979283333, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.02034575119614601, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.017715422436594963, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.01700538955628872, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.016836971044540405, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.010175036266446114, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.008732952177524567, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.00866269413381815, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.008022986352443695, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.00795914139598608, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005315587390214205, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.005297614727169275, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.0050934175960719585, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0034564523957669735, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.31.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06615781038999557, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.06213793158531189, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.060249049216508865, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05481758341193199, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.030616873875260353, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.02881227619946003, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.035306449979543686, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.032612692564725876, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03101501613855362, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.0276112649589777, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.02658582292497158, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.017832059413194656, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.015528392046689987, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.014638463035225868, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.014420305378735065, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.008908944204449654, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.0074698650278151035, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.007367661222815514, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006853114347904921, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.0067513310350477695, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004628809168934822, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004518346861004829, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.0043555498123168945, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.002824497641995549, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.31.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.19719696044921875, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.18558716773986816, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.18193534016609192, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.16600029170513153, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09179381281137466, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08742913603782654, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10211522877216339, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09419339150190353, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09293026477098465, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08277719467878342, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.07946469634771347, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.051673393696546555, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04487171396613121, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04375043138861656, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04349203780293465, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.025755902752280235, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.022075334563851357, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.021994078531861305, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.020189592614769936, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.02013380266726017, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013158834539353848, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.012671967968344688, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.012730391696095467, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007501095999032259, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.31.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.1554049849510193, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.12267161160707474, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.10525782406330109, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.0855775773525238, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.06764010339975357, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.051397405564785004, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.09253347665071487, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.08502008020877838, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.07306550443172455, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.052072636783123016, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.048386845737695694, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.047410570085048676, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.04093485698103905, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03323625400662422, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03116440773010254, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.023930663242936134, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.018376542255282402, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.017586736008524895, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.015610776841640472, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.014403125271201134, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.01343994028866291, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.013816693797707558, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.011186109855771065, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.010343463160097599, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.31.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.18076913058757782, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.17093917727470398, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.16797654330730438, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.15401549637317657, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.0845220685005188, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.08093120157718658, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09338019043207169, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08634740114212036, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08547244220972061, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07676451653242111, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07396630942821503, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.0474599152803421, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.041222307831048965, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04039676487445831, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04020066559314728, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02371409721672535, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.02071746252477169, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.020662473514676094, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.019144952297210693, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.019115399569272995, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012464088387787342, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.01238325610756874, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.012200875207781792, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008127272129058838, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.31.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.21921488642692566, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.20732729136943817, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20374922454357147, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.18678535521030426, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10243445634841919, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09808921068906784, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11303041130304337, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10463833063840866, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10357394069433212, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09305435419082642, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08953423798084259, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05738616734743118, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04989577457308769, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.048894234001636505, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.048660922795534134, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.0286251250654459, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.024905722588300705, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02483779937028885, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.022977007552981377, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.022942299023270607, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.014864739961922169, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.014606958255171776, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.01453928742557764, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009217323735356331, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.31.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.20856790244579315, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.1894538849592209, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.18059375882148743, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.16267508268356323, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09495427459478378, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08732011914253235, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11244332790374756, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10358322411775589, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09739342331886292, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.0837186798453331, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07974766194820404, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.056951191276311874, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04945365712046623, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04561489447951317, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04468098282814026, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.028707167133688927, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.023916875943541527, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.02347473055124283, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.021733876317739487, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.02114235796034336, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.01569446362555027, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015517470426857471, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014533493667840958, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010792319662868977, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.32.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.07857582718133926, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.07380092889070511, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.07190899550914764, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.06555014848709106, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03671274334192276, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03474491089582443, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.04143695905804634, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.03833794221282005, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.037186190485954285, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.03304057568311691, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.03172365576028824, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.021010760217905045, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.018308676779270172, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.017582809552550316, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.01740904524922371, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.010509428568184376, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.009057649411261082, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.008987059816718102, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.008314782753586769, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.00824821088463068, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005505049601197243, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.005534806754440069, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.005276455543935299, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0036682875361293554, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.32.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06671116501092911, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.06254163384437561, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.060382209718227386, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05492968112230301, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.03092316910624504, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.02898011915385723, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.035929061472415924, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.033142633736133575, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03131252899765968, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.0278147105127573, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.02680053375661373, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.018140269443392754, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.015780773013830185, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.014787561260163784, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.014548803679645061, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.009068234823644161, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.00758598605170846, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.007474720478057861, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006959960795938969, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.0068412283435463905, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004745441488921642, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004667380359023809, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004440163727849722, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0029907398857176304, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.32.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.21777097880840302, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.2047576904296875, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.20061911642551422, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.18289531767368317, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.10166653990745544, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.09678946435451508, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.11323464661836624, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.10440223664045334, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.10294733941555023, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.09148155897855759, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08775235712528229, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05736197158694267, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04971383884549141, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.048478756099939346, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.048185303807258606, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.028600843623280525, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.02447204291820526, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.024386178702116013, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.02234673500061035, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.022288121283054352, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.014601793140172958, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.014071829617023468, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.014116499572992325, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.008363217115402222, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.32.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.07576971501111984, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.058197665959596634, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.052475206553936005, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.04348483309149742, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03291307017207146, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.02620432898402214, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.040154751390218735, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.0371565967798233, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03547601029276848, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.023640913888812065, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.022443922236561775, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.02045929804444313, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.017763610929250717, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.015879202634096146, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.01540650986135006, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.010243473574519157, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.008343162946403027, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.008234966546297073, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.0065592145547270775, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.0062758238054811954, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005476891063153744, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.005527691915631294, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004883116111159325, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0038074178155511618, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.32.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.1813936084508896, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.17155994474887848, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.16857007145881653, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.15457330644130707, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08481712639331818, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.08122130483388901, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09375458210706711, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.0866762101650238, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08577942103147507, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07702159881591797, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07419748604297638, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.047613292932510376, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.041362978518009186, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04053162410855293, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04033179581165314, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.023803504183888435, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.020748943090438843, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02069341577589512, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.019160185009241104, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.019130010157823563, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012503167614340782, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012339930050075054, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.012231992557644844, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008022008463740349, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.32.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.22056762874126434, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.2085692435503006, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20499664545059204, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1879149228334427, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10312291234731674, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09874697774648666, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11376038938760757, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10533736646175385, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10426675528287888, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09364058077335358, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.09006490558385849, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05775909125804901, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.050237398594617844, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.049230530858039856, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04899059608578682, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02881702594459057, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.025068499147892, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02499794401228428, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.023116696625947952, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.023083461448550224, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.014955424703657627, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.014697514474391937, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.01462523266673088, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009262163192033768, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.32.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.21011357009410858, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.19144207239151, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.1825534552335739, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.16462498903274536, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09573448449373245, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08817943185567856, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11341015249490738, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10452167689800262, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09809288382530212, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08465440571308136, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.0805947482585907, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05714019387960434, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.0498599149286747, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04597238078713417, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.045024674385786057, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.02860342711210251, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.02407548576593399, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.023628031834959984, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.02192557603120804, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.021328851580619812, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015219437889754772, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015590672381222248, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.01400541327893734, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010804298333823681, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.33.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.07646681368350983, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.07172643393278122, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06979377567768097, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.0635230541229248, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.035690173506736755, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03370898216962814, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.0404057651758194, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.037411924451589584, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.036161962896585464, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.03206890448927879, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.030787479132413864, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.020502744242548943, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.017860079184174538, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.01708938553929329, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.01690848544239998, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.010253146290779114, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.008792572654783726, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.008715146221220493, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.008058612234890461, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.007984552532434464, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005369268357753754, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.0053682029247283936, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.005130604840815067, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0035261621233075857, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.33.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06772197037935257, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.0634673610329628, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.06125723943114281, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.0556696392595768, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.031335171312093735, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.029348798096179962, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03637772426009178, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.03360386937856674, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03176037222146988, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.0281435064971447, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.02707289718091488, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.018362006172537804, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.01600707322359085, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.014986193738877773, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.014738490805029869, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.00918523222208023, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.0076759494841098785, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.007557411678135395, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.00702532846480608, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006908561568707228, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004796653985977173, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.0046998849138617516, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.0044914898462593555, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.002986195497214794, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.33.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.20331279933452606, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.19070306420326233, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.1866183876991272, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.16982345283031464, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09455438703298569, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08985168486833572, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10559947043657303, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.0972873792052269, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09577252715826035, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08490975201129913, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08133693784475327, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05343571677803993, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04631292074918747, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.045080579817295074, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04477445036172867, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.026616346091032028, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.022768869996070862, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.02267894335091114, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.020776253193616867, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.02069372683763504, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013608206063508987, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.013119127601385117, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.01313696801662445, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007819544523954391, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.33.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.14497989416122437, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.11511131376028061, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.10630358010530472, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.08223042637109756, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.0662560984492302, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.053812041878700256, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.07524722814559937, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.06920844316482544, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.06816764920949936, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.04605846479535103, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.03941628709435463, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.03835753723978996, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.03313883766531944, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03181350231170654, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03150104358792305, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.01923520676791668, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.016567515209317207, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.01650378666818142, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.012646760791540146, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.012562687508761883, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.010374734178185463, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.010386602021753788, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.009962229989469051, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.007203853223472834, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.33.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.18396030366420746, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1739378422498703, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.17089614272117615, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.15666264295578003, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08610610663890839, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.0824146568775177, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09509206563234329, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08799437433481216, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08706001192331314, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07815439254045486, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07514000684022903, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04831026867032051, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04199298471212387, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04114103689789772, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04094136133790016, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.024131398648023605, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.021064184606075287, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02100706472992897, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.019443735480308533, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.01941349171102047, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012635455466806889, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.01253243163228035, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.012359455227851868, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008150842972099781, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.33.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.22312740981578827, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.21095265448093414, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20731988549232483, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1900538206100464, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10441359132528305, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09995576739311218, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11516265571117401, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.1066831648349762, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10558321326971054, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09479645639657974, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.09113378822803497, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05853135883808136, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.05089201778173447, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04986898973584175, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04962409660220146, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02920001931488514, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.025419466197490692, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.025346778333187103, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.02343767322599888, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.023405827581882477, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.015171820297837257, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.01494251936674118, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.014837874099612236, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009470301680266857, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.33.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.213258296251297, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.19498614966869354, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.18635259568691254, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.16809716820716858, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09729355573654175, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08993539214134216, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11529716104269028, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10580819100141525, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09955047816038132, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08620849996805191, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.0822138637304306, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.058155905455350876, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.05036928877234459, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04661308974027634, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04569411650300026, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.02925690822303295, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.02419658936560154, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.023758362978696823, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.022017832845449448, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.02143687754869461, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.01577189564704895, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015369945205748081, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014588329941034317, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010330780409276485, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.34.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.0751066505908966, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.07038918882608414, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06844493001699448, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.062292490154504776, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.035068947821855545, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03306647762656212, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.039778321981430054, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.03685176745057106, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03553440421819687, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.03149331733584404, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.030192172154784203, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.020161084830760956, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.017597222700715065, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.016802731901407242, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.016615837812423706, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.010085170157253742, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.008667293936014175, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.00858442485332489, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.007943958975374699, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.007866134867072105, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005290370900183916, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.005329933948814869, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.005046952050179243, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.003539705416187644, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.34.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.065457783639431, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.061255842447280884, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.058896467089653015, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05343567579984665, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.030257074162364006, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.028180863708257675, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03552006185054779, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.03284051641821861, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03064955398440361, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.027154389768838882, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.02614656463265419, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.01789388246834278, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.01563837192952633, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.014480856247246265, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.014198769815266132, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.008952805772423744, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.007441757246851921, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.007304260041564703, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006816671695560217, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006672721356153488, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004689226392656565, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004623658489435911, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.00434603076428175, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0029628495685756207, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.34.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.21283839643001556, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.19961878657341003, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.19550630450248718, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.17787767946720123, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09915859252214432, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.09420282393693924, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.11071011424064636, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.10189105570316315, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.10046833753585815, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08906877785921097, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08533231168985367, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05607043579220772, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04854857176542282, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.047275591641664505, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04696597903966904, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.027950992807745934, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.023891093209385872, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.023808542639017105, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.0217985138297081, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.021723777055740356, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.014298597350716591, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.013800247572362423, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.013784486800432205, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.008249233476817608, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.34.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.15613120794296265, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.12453479319810867, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.11100336164236069, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.08502373099327087, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.06974233686923981, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.05614151060581207, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.08787443488836288, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.08046911656856537, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.0731472298502922, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.049328431487083435, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.04581579193472862, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.04472612217068672, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.03862430900335312, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03375859558582306, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03252708911895752, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.022624297067523003, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.018037380650639534, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.017551491037011147, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.014238550327718258, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.013481620699167252, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.012513342313468456, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.012447712011635303, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.01102512888610363, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.008916902355849743, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.34.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.18451520800590515, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1744321882724762, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.17139340937137604, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.15710873901844025, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08641120791435242, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.08270739763975143, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09550304710865021, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08830665051937103, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.0873766839504242, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07842615991830826, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.0754372626543045, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04860455170273781, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04217221587896347, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04132215678691864, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.041118547320365906, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.024282565340399742, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.02120603807270527, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.021150942891836166, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.019582953304052353, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.019553925842046738, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012791591696441174, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012693814001977444, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.012516164220869541, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008358066901564598, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.34.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.22553810477256775, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.2131887525320053, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20948457717895508, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1920178383588791, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10554835200309753, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.10103444755077362, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11653123795986176, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10785121470689774, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10673342645168304, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.0957854688167572, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.09209123998880386, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05921554937958717, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.051450226455926895, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.05040712282061577, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.05016116052865982, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.029551619663834572, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.025685064494609833, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.025615058839321136, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.023674966767430305, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.023641806095838547, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.01539820246398449, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.015084986574947834, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.015057558193802834, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009542582556605339, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.34.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.21202610433101654, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.19418732821941376, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.1856183260679245, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.16763269901275635, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09679725021123886, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08955705165863037, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11407100409269333, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10530640184879303, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.0990467220544815, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08599109202623367, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.08195149153470993, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.057703472673892975, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.05010262131690979, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04633636027574539, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.045415762811899185, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.02891153283417225, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.023961735889315605, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.023520540446043015, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.021819567307829857, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.021237842738628387, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015415539965033531, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.01510351151227951, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014238372445106506, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.009984932839870453, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.35.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.0671931579709053, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.06287097930908203, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.060857485979795456, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.05533041059970856, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.0313248448073864, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.029398955404758453, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.03608018532395363, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.03330843523144722, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03175796940922737, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.028092892840504646, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.026963381096720695, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.018278103321790695, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.015941131860017776, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.015069711953401566, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.014852896332740784, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.009216856211423874, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.007875993847846985, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.007781590335071087, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.007236258126795292, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.0071399398148059845, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.004969828296452761, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.005014880560338497, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004707406274974346, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0034933032002300024, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.35.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.05919516459107399, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.05532712861895561, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.05297018215060234, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.04808143526315689, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.027293162420392036, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.02534371428191662, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03227081522345543, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.02983679249882698, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.027693118900060654, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.024460652843117714, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.023536231368780136, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.016272764652967453, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.01418288704007864, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.013067839667201042, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.012785573489964008, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.008136137388646603, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.006713539361953735, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.0065825581550598145, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006140607409179211, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006001363042742014, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004270574077963829, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004191996064037085, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.0039329747669398785, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.002694866620004177, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.35.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.19767789542675018, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.18520185351371765, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.1811176985502243, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.16455034911632538, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09196877479553223, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08727160841226578, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10293135792016983, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09481395035982132, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09319061040878296, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08242884278297424, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.07887070626020432, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05211475118994713, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.045147307217121124, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04387423023581505, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04357828199863434, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.0259855929762125, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.022175336256623268, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.022075073793530464, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.020190050825476646, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.020111124962568283, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.0132954316213727, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.012834860011935234, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.01279466599225998, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007694627158343792, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.35.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.15343394875526428, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.11761455237865448, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.10566025227308273, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.07755358517169952, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.06807311624288559, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.054375410079956055, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.08344505727291107, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.07490956038236618, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.07162106037139893, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.04374304041266441, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.042721934616565704, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.04273174703121185, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.03719618543982506, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03422107547521591, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03348316624760628, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.023062927648425102, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.020026270300149918, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.019826902076601982, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.016177034005522728, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.015827475115656853, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.01494417805224657, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.015294872224330902, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.014229938387870789, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.01299564354121685, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.35.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.18568147718906403, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.17550382018089294, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1724216341972351, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1580151617527008, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08700191229581833, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.08325818181037903, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09629181027412415, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08891361951828003, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08798214793205261, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07891903072595596, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07600252330303192, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.049017779529094696, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.042490482330322266, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.0416286401450634, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04142596200108528, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.024548280984163284, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.02141461707651615, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02135639823973179, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.019779181107878685, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.019750187173485756, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.013066737912595272, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.01289431843906641, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.012792183086276054, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008586736395955086, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.35.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.22794441878795624, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.21542644500732422, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.21165472269058228, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.19390258193016052, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10671231150627136, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.10213072597980499, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11784704029560089, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10902980715036392, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10790730267763138, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09681769460439682, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.09310666471719742, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05987817049026489, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.05203286558389664, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.05097595229744911, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.05072341486811638, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02988571859896183, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.026006823405623436, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.025935273617506027, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.023968352004885674, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.02393188141286373, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.0155918775126338, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.015319516882300377, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.015252315439283848, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009756173938512802, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.35.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.2189413160085678, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.20077744126319885, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.1920350342988968, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.1734212040901184, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.10002277046442032, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.09264617413282394, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11781392991542816, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10872095823287964, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.10229237377643585, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08896373212337494, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.08478321880102158, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05959306284785271, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.05181214585900307, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.047918640077114105, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04697369784116745, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.029959797859191895, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.0248744897544384, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.024413907900452614, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.02269156463444233, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.022088121622800827, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.01615854911506176, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015794796869158745, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014958729967474937, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010591717436909676, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.36.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.07948446273803711, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.07424198091030121, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.07211542874574661, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.06542795896530151, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03707895800471306, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03489239513874054, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.0421147346496582, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.03897050395607948, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03760705888271332, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.0331517793238163, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.03175828978419304, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.021378731355071068, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.018644168972969055, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.017796751111745834, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.017595598474144936, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.010698557831346989, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.009244294837117195, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.009158707223832607, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.00846030842512846, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.00837685912847519, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005655685905367136, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.005778387188911438, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.005393922328948975, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.003955348394811153, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.36.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.0667925626039505, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.062376514077186584, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.0599316768348217, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05428202450275421, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.030898893252015114, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.028721099719405174, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03625479340553284, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.033512432128190994, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03133976086974144, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.02766149677336216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.02657313644886017, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.018295710906386375, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.015978917479515076, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.014799707569181919, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.014508655294775963, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.009159054607152939, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.007616827264428139, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.0074836015701293945, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006962756160646677, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006823559291660786, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.0048183249309659, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.0047556087374687195, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004459511488676071, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.003091312013566494, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.36.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.2072657197713852, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.1938394457101822, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.1895495355129242, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.17188581824302673, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09614983201026917, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.09116057306528091, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10749467462301254, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09906307607889175, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.0974930077791214, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08605498820543289, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08227990567684174, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.054360631853342056, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.047156039625406265, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04581378772854805, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04551273211836815, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.027092531323432922, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.02315639704465866, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.023054057732224464, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.021050238981842995, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.020975099876523018, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013859739527106285, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.013382918201386929, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.01334524154663086, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007998291403055191, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.36.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.10959368944168091, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.09092492610216141, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.08534160256385803, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.06410511583089828, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.04949404299259186, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.0424131378531456, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.056714266538619995, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.05220029130578041, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.0511622317135334, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.03605152294039726, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.03135937824845314, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.02890692837536335, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.02498600259423256, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.02376731112599373, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.023478882387280464, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.014492516405880451, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.012428931891918182, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.012367679737508297, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.009834385477006435, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.009774204343557358, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.007828815840184689, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.007893603295087814, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.007443827576935291, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.005541949067264795, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.36.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.18317265808582306, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.17310914397239685, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1700257807970047, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.15575389564037323, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.0858858972787857, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.082154281437397, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.0949997752904892, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08779743313789368, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08686055988073349, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07787065207958221, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07493597269058228, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04832863807678223, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.041952818632125854, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.041094597429037094, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04088569059967995, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.024175221100449562, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.021129004657268524, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.021071305498480797, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.0195053331553936, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.019473107531666756, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012777328491210938, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012710342183709145, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.012506145983934402, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008447500877082348, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.36.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.22688159346580505, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.2143971025943756, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.2106337696313858, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.192904531955719, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10626355558633804, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.10167796909809113, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11730709671974182, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10863510519266129, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.1074855849146843, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09638457745313644, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.09260997176170349, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05960803106427193, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.05181761831045151, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.050757721066474915, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.05050711706280708, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.029727395623922348, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.0258516613394022, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02577788569033146, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.0238096434623003, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.02377224713563919, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.015444008633494377, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.015164929442107677, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.015098320320248604, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009557871147990227, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.36.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.21515126526355743, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.19728650152683258, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.18837235867977142, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.1702604591846466, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09825015813112259, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.09088261425495148, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.1163206472992897, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.1073363795876503, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.1005183607339859, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08741188794374466, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.08341046422719955, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05863435938954353, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.051042940467596054, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.047044094651937485, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.0460621677339077, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.029279261827468872, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.024346671998500824, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.023870421573519707, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.022197065874934196, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.021577076986432076, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015356614254415035, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015398050658404827, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.01406441442668438, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010183985345065594, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.37.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.08070781826972961, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.07564426213502884, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.07353638112545013, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.06688579171895981, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.0376763716340065, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03554071858525276, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.042792342603206635, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.039571139961481094, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03818298503756523, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.03381488844752312, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.03246486186981201, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.021685469895601273, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.01889694668352604, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.018046941608190536, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.017844852060079575, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.010851491242647171, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.009295884519815445, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.009207937866449356, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.008513265289366245, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.008429649285972118, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005690823774784803, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.005693479441106319, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.005429626442492008, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0037540674675256014, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.37.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.07013453543186188, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.0656522810459137, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.06322427093982697, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05742112919688225, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.03247173875570297, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.03030659817159176, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03792940080165863, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.03506547212600708, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.032892726361751556, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.029151741415262222, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.028021017089486122, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.019113946706056595, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.016687825322151184, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.015521414577960968, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.015236256644129753, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.009554878808557987, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.007949361577630043, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.007811457384377718, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.007272821385413408, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.007133840583264828, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.00498452503234148, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004887540824711323, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004634579177945852, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.003089808626100421, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.37.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.21540425717830658, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.20197419822216034, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.19774919748306274, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.17988455295562744, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.10024137794971466, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.09527922421693802, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.11177753657102585, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.10299016535282135, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.10153920203447342, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08990824222564697, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08617200702428818, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05663038417696953, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.049048393964767456, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04778385907411575, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04750286042690277, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.028243469074368477, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.024192476645112038, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.024094371125102043, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.02206128090620041, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.021978821605443954, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.014459029771387577, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.0139948520809412, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.013953957706689835, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.008450486697256565, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.37.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.1597200185060501, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.12780135869979858, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.11865614354610443, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.08898954093456268, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.07226431369781494, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.05949005112051964, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.08394410461187363, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.07607343047857285, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.07489190250635147, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.04949163645505905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.043533891439437866, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.042679645121097565, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.036572910845279694, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03486720845103264, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03445766493678093, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.021648598834872246, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.01847946085035801, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.018408263102173805, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.014194733463227749, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.014082434587180614, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.012198282405734062, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.01204025000333786, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.011703502386808395, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.00880958791822195, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.37.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.18115153908729553, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.17116668820381165, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1681140661239624, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.15393705666065216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08492373675107956, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.08124767988920212, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09387262910604477, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.0868254005908966, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08590168505907059, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07698889821767807, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07399392127990723, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04777677357196808, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.041490789502859116, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04063340276479721, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04043225571513176, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02387607842683792, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.020885637030005455, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.020826401188969612, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.019276905804872513, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.019244812428951263, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012574110180139542, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012547709979116917, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.012303264811635017, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008319028653204441, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.37.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.22669342160224915, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.21416683495044708, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.21040058135986328, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.19261610507965088, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10619994252920151, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.10159672796726227, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11730895191431046, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10855325311422348, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10740242898464203, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09626823663711548, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.09246423840522766, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05958572402596474, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.051788680255413055, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.0507240928709507, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.050473544746637344, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.029749076813459396, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.025850357487797737, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.025779834017157555, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.023808680474758148, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.023773008957505226, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.015492445789277554, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.015194190666079521, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.015137875452637672, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009622608311474323, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.37.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.21087758243083954, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.19339503347873688, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.18432104587554932, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.16655223071575165, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09639358520507812, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08897589147090912, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11466104537248611, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.1058264747262001, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09864357113838196, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08580785989761353, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.08183412253856659, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05784926563501358, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.05049232766032219, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.046324003487825394, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04529010131955147, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.029117237776517868, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.024247081950306892, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.023757945746183395, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.022184962406754494, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.021548468619585037, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015680700540542603, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.01573898084461689, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014399657025933266, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010876714251935482, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.38.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.08350080996751785, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.07839014381170273, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.07634971290826797, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.06950142979621887, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03904784843325615, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.0369299054145813, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.044047750532627106, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.040782492607831955, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03955422714352608, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.03508608043193817, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.03361070901155472, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.022321561351418495, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.01946008764207363, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.018685370683670044, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.01849941909313202, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.01115492358803749, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.009594392962753773, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.009518200531601906, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.00878759752959013, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.008718090131878853, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005822864826768637, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.005818701349198818, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.005577240604907274, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.003790192771703005, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.38.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.07116824388504028, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.06665638834238052, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.0643698126077652, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.058429136872291565, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.03295726329088211, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.03087448887526989, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03823158144950867, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.03528792783617973, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03340426832437515, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.02959449589252472, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.028400994837284088, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.019307835027575493, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.016815761104226112, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.01576562412083149, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.015504658222198486, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.00965371634811163, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.008076244033873081, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.007956271059811115, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.007393450941890478, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.00726822717115283, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.005050416104495525, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004947710316628218, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004720844328403473, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0031477361917495728, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.38.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.20179583132266998, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.18927346169948578, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.18531130254268646, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.16849654912948608, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09392890334129333, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08927623182535172, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10462051630020142, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.0965467318892479, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09512262791395187, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08434712141752243, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08066695928573608, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.052908819168806076, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04596121236681938, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04477560520172119, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04447551816701889, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.026379244402050972, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.02261548489332199, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.022529251873493195, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.020620467141270638, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.02055283822119236, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013487226329743862, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.013031531125307083, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.013041980564594269, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007766719441860914, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.38.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.15861418843269348, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.12441924959421158, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.11307694017887115, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.08575067669153214, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.07060995697975159, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.05652822181582451, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.08424234390258789, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.07738788425922394, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.07438889145851135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.048649679869413376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.043771784752607346, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.0428505502641201, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.036963947117328644, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.033903151750564575, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03314799442887306, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.021515514701604843, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.01764732040464878, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.017455359920859337, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.01337711326777935, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.013002409599721432, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.011619563214480877, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.011318993754684925, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.01066569797694683, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.007635911460965872, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.38.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.18111968040466309, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.17104724049568176, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.167986199259758, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.15387386083602905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08495423197746277, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.08124148100614548, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.093922920525074, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08684994280338287, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08593633025884628, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.0769730806350708, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.073956198990345, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04774392768740654, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04150892049074173, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04064813628792763, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04044109582901001, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02386324107646942, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.02090456895530224, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02084720879793167, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.019284162670373917, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.0192541666328907, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012549243867397308, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012577345594763756, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.012274065054953098, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.00835409015417099, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.38.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.22565105557441711, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.21309444308280945, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20933006703853607, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.19165396690368652, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10573278367519379, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.10113263130187988, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11674125492572784, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10808609426021576, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10696426033973694, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09580815583467484, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.09199757874011993, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.059318091720342636, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.051575202494859695, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.05051285773515701, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.05026014521718025, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02959192357957363, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.02573687583208084, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.025669194757938385, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.023696886375546455, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.023658649995923042, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.015371346846222878, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.015122770331799984, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.015023650601506233, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009564557112753391, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.38.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.2173861414194107, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.19914238154888153, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.1900564730167389, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.17182068526744843, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09916878491640091, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.09167192131280899, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11735007166862488, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10850361734628677, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.10158073902130127, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.0882590115070343, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.08424210548400879, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05933099985122681, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.05155974254012108, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04743866249918938, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.0464228056371212, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.029598910361528397, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.024436362087726593, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.02394767664372921, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.022260142490267754, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.02161562629044056, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015485868789255619, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015322849154472351, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.01415241789072752, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.009921025484800339, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.39.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.08862590044736862, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.08307874947786331, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.08086627721786499, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.07353273034095764, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.04143601283431053, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.039126381278038025, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.04681449383497238, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.043355587869882584, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.04199030250310898, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.03717013821005821, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.035604316741228104, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.023731261491775513, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.020723361521959305, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.01986522041261196, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.019656920805573463, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.011881235055625439, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.01026993989944458, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.01018567569553852, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.009412349201738834, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.009333617985248566, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.006247598212212324, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.006337470840662718, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.005980461835861206, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.004266439937055111, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.39.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.07488304376602173, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.07010278850793839, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.06777455657720566, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.06160541623830795, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.034806907176971436, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.03259316086769104, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.04014267027378082, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.037111517041921616, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03524107113480568, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.0311824269592762, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.029952416196465492, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.02028503268957138, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.017690692096948624, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.016654524952173233, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.016398988664150238, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.010152728296816349, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.008542563766241074, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.008423982188105583, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.00781263317912817, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.007690526079386473, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.005303630605340004, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.005239632911980152, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004982242826372385, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.003362518036738038, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.39.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.20637719333171844, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.19354459643363953, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.18946048617362976, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.17216433584690094, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09609468281269073, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.09129354357719421, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10721749812364578, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09888705611228943, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09735040366649628, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08623696863651276, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.0824771299958229, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05424722284078598, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04708021506667137, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04581717029213905, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04552232846617699, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.027050508186221123, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.023166757076978683, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.023067083209753036, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.021117370575666428, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.021034900099039078, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013838482089340687, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.013386359438300133, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.01336626522243023, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.008014986291527748, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.39.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.1044844388961792, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.07862250506877899, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.0705736055970192, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.059788983315229416, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.047046832740306854, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.0360916443169117, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.05523288995027542, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.050459787249565125, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.04894858971238136, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.03211575001478195, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.029394173994660378, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.028101839125156403, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.024231091141700745, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.02271384187042713, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.022344298660755157, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.0142141655087471, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.012106540612876415, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.01201540045440197, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.009333756752312183, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.009134609252214432, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.007925792597234249, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.008040354587137699, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.007469857111573219, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.005925492849200964, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.39.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.17958475649356842, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.16958977282047272, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.16656319797039032, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1524907946586609, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08424597978591919, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.08054600656032562, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09310755878686905, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08611825108528137, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.0852259173989296, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07631637156009674, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07334738224744797, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.047368746250867844, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04116631671786308, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04031757265329361, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.040116313844919205, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.023675350472331047, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.02074141427874565, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02068709395825863, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.019133485853672028, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.0191036444157362, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012463758699595928, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012489384971559048, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.012189190834760666, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008312658406794071, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.39.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.22332850098609924, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.21087518334388733, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.2071436643600464, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1896715611219406, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10469266772270203, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.10010761767625809, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11603063344955444, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10700016468763351, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10589510947465897, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09482602775096893, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.09128072112798691, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05901819095015526, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.05112046003341675, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.050072092562913895, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.049821268767118454, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.029565423727035522, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.025643838569521904, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.025572272017598152, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.023629162460565567, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.02359412983059883, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.015650399029254913, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.015262557193636894, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.015317798592150211, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009929352439939976, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.39.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.21364010870456696, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.19509664177894592, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.185806006193161, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.16796723008155823, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09730789810419083, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08968091756105423, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11596381664276123, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10700909793376923, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09989193081855774, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08651228994131088, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.08259352296590805, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.058615703135728836, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.05091647803783417, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.046634119004011154, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04558224976062775, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.02931116707623005, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.024185236543416977, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.023688973858952522, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.022052695974707603, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.021389320492744446, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015503068454563618, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015418731607496738, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014127030037343502, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010259361937642097, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.40.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.08495642244815826, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.07972422987222672, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.0776325985789299, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.0706731528043747, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.039721958339214325, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.037539176642894745, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.044798970222473145, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.04152143746614456, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.040234774351119995, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.035684190690517426, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.03416820243000984, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.022731788456439972, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.019832726567983627, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.019018739461898804, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.018827026709914207, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.011366507038474083, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.009788860566914082, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.009708894416689873, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.008970608003437519, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.008895321749150753, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005955450236797333, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.0059737698175013065, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.00570335891097784, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.003935644868761301, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.40.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.07268918305635452, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.06811050325632095, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.06583713740110397, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05980438366532326, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.03372900187969208, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.03160572797060013, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.039051443338394165, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.03603503853082657, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03418140858411789, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.030297407880425453, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.02911592274904251, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.019744304940104485, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.01719406247138977, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.016123972833156586, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.015868978574872017, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.009864945895969868, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.008268933743238449, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.008147931657731533, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.007567796390503645, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.007444468326866627, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.005168188828974962, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.005059804301708937, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004846197087317705, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0032178019173443317, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.40.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.2109297662973404, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.19807223975658417, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.19410042464733124, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.17660914361476898, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09837253391742706, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.09363338351249695, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10943853110074997, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.10091039538383484, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09961538761854172, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08843889087438583, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08459517359733582, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.055418021976947784, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.0480702705681324, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.046906646341085434, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04663579910993576, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02762100100517273, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.023682331666350365, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.023595910519361496, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.02160775288939476, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.021561134606599808, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.014117433689534664, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.013615320436656475, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.013664370402693748, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.008079576306045055, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.40.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.14914539456367493, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.11268778890371323, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.10069753974676132, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.08548948168754578, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.06518589705228806, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.05038117989897728, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.07946839183568954, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.07273529469966888, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.07005267590284348, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.0476183257997036, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.042931895703077316, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.04059596359729767, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.03500308096408844, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03163173422217369, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.030781161040067673, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.020588912069797516, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.017118889838457108, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.016945920884609222, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.013921767473220825, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.01345613319426775, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.011622915044426918, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.011874768882989883, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.010620424523949623, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.008933084085583687, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.40.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.17956945300102234, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1695815473794937, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.16654834151268005, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1524224728345871, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08428001403808594, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.08059470355510712, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09316089004278183, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08615783601999283, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08523914963006973, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07633204013109207, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07338304072618484, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04741865396499634, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04119179770350456, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.040345270186662674, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.040147628635168076, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02371443435549736, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.020776817575097084, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.020718203857541084, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.019170716404914856, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.01914099231362343, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.01252799853682518, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012544755823910236, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.012259932234883308, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008391437120735645, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.40.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.2186710238456726, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.20650428533554077, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.20284982025623322, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.18566159904003143, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10253582894802094, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09805899113416672, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11328652501106262, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10480678081512451, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.10371758788824081, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09287432581186295, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08920474350452423, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.057647254317998886, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.05003441870212555, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.049006711691617966, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04876556247472763, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02877800166606903, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.02503081038594246, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.0249624103307724, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.023049037903547287, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.023010240867733955, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.015057909302413464, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.01479632593691349, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.014721604995429516, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009480915032327175, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.40.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.2061455249786377, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.18811927735805511, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.17875336110591888, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.16157355904579163, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09404370933771133, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08640611916780472, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11232022196054459, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10388018190860748, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09654000401496887, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08346644788980484, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07976695895195007, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.056945253163576126, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.0494956411421299, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04514045640826225, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04406971111893654, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.02847251668572426, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.023525677621364594, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.023019865155220032, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.021448291838169098, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.020774660632014275, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015096929855644703, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.01518204528838396, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.01370596420019865, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010276294313371181, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.41.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.09194241464138031, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.08628816902637482, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.08413922041654587, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.07656314969062805, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.04298030585050583, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.04068512097001076, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.04830000549554825, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.04478096216917038, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.043533459305763245, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.038621190935373306, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.03698208928108215, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.024510880932211876, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.021390847861766815, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.02058815397322178, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.0203959122300148, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.01225473266094923, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.010626362636685371, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.01055101677775383, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.009749412536621094, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.009681697934865952, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.006433672271668911, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.006517502944916487, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.006181969773024321, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.004366541747003794, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.41.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.07420524209737778, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.06960605829954147, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.06743353605270386, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.061289384961128235, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.03443944454193115, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.03236521780490875, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03971240296959877, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.036613013595342636, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03490261361002922, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.03094553016126156, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.02972838282585144, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.020033564418554306, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.017434608191251755, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.01646944135427475, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.016221147030591965, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.010022113099694252, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.008410177193582058, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.008300180546939373, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.0076979282312095165, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.007590809371322393, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.005216439254581928, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.005090581253170967, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004911379422992468, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.00321059743873775, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.41.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.2088841199874878, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.19623038172721863, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.1924743354320526, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.1752292960882187, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09744974970817566, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.09273137897253036, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10827004909515381, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09983421862125397, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09859670698642731, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08760510385036469, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08392048627138138, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.054839614778757095, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.047545257955789566, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.046422429382801056, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.046152640134096146, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02732016146183014, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.023421380668878555, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.02334793284535408, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.021384302526712418, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.02133670262992382, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013948680832982063, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.013438107445836067, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.013520126231014729, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007953597232699394, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.41.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.16548433899879456, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.11891071498394012, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.10417625308036804, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.08194997906684875, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.07231395691633224, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.05604737997055054, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.0863422304391861, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.07917552441358566, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.07683120667934418, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.0454358272254467, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.0433686226606369, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.04385334253311157, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.038031455129384995, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.034984346479177475, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03424378111958504, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.02213987335562706, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.01886555925011635, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.018725313246250153, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.01405876874923706, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.013578074052929878, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.012286309152841568, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.01289504673331976, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.011371063068509102, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.009686747565865517, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.41.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.176618292927742, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.16674447059631348, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1637626439332962, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1499413698911667, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08289562165737152, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07927490025758743, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09166575968265533, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08472998440265656, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08384054899215698, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.0751001313328743, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07220115512609482, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04667260870337486, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04053184762597084, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03970275819301605, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03951110318303108, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02333862893283367, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.020490696653723717, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.020436007529497147, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.018916890025138855, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.018891455605626106, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012352608144283295, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012436442077159882, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.01208340935409069, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008404170162975788, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.41.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.21381330490112305, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.20190605521202087, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.19833782315254211, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.1815837025642395, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.10026893019676208, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.0958855152130127, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.11069905012845993, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.10245463997125626, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.1014031395316124, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.09082543104887009, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08725500106811523, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05630633234977722, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04891651123762131, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04791863262653351, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04768308997154236, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.028094176203012466, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.024468017742037773, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02440592646598816, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.022534510120749474, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.022501010447740555, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.014661779627203941, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.01445432472974062, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.01433621160686016, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.009255990386009216, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.41.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.2061954140663147, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.1869940459728241, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.17722705006599426, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.16004498302936554, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.09371016919612885, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.0856640487909317, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.11269815266132355, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.10399378091096878, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09647388011217117, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.08290150761604309, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07924195379018784, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.057150375097990036, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04957563430070877, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04500450938940048, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04387382045388222, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.02884877659380436, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.023511260747909546, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.022981630638241768, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.021399740129709244, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.02068665251135826, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015747498720884323, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015288813039660454, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.014345387928187847, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010412582196295261, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.42.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.08745349943637848, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.08203644305467606, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.07996705174446106, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.07268132269382477, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.04086151346564293, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03866272419691086, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.04592829942703247, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.04258956015110016, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.04138670116662979, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.036677949130535126, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.035086363554000854, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.02328687533736229, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.020323526114225388, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.019556552171707153, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.019374854862689972, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.01164131611585617, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.010050438344478607, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.009976851753890514, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.009201335720717907, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.009133423678576946, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.006092393770813942, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.006100444123148918, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.005850736517459154, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.003995678853243589, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.42.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.0730196088552475, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.06842374801635742, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.06632211804389954, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.06019444018602371, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.0338943786919117, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.03187843784689903, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03893454000353813, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.03591785207390785, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.034335728734731674, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.030423596501350403, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.02919922210276127, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.019664496183395386, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.017120713368058205, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.016217954456806183, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.01599067635834217, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.00983694102615118, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.008288110606372356, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.008187035098671913, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.007574994117021561, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.007475240156054497, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.005114982835948467, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.005015377886593342, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004829089157283306, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.003169048810377717, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.42.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.20221766829490662, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.18982602655887604, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.18607588112354279, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.16925741732120514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09442386031150818, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08984769135713577, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10494303703308105, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09685461223125458, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.0955670028924942, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08480896800756454, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08113148808479309, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05308928340673447, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04612002894282341, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04503730311989784, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.044775065034627914, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02646205760538578, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.022723855450749397, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.02264539524912834, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.02072010561823845, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.02066786400973797, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013525574468076229, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.013049477711319923, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.013115627691149712, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007760029286146164, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.42.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.10287445038557053, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.07628439366817474, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06855796277523041, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.056243278086185455, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.045517731457948685, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03595631942152977, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.05330899357795715, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.04877877235412598, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.047968603670597076, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.03122803010046482, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.027983812615275383, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.02716260589659214, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.023504113778471947, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.022094614803791046, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.021749291568994522, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.01380880456417799, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.012008067220449448, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.011957676149904728, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.00943697988986969, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.009263833053410053, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.007873252034187317, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.008220643736422062, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.007468631491065025, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.006336793303489685, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.42.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.17518073320388794, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.16543099284172058, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.16249023377895355, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.14878767728805542, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08221176266670227, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07863582670688629, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09076488763093948, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08404037356376648, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08316449820995331, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07447223365306854, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07154372334480286, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.046152569353580475, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.040163472294807434, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03934384882450104, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03914997726678848, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.023060612380504608, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.020233990624547005, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.020179906859993935, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.018664119765162468, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.018637022003531456, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012108604423701763, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.012175137177109718, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011841695755720139, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008093338459730148, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.42.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.2081930935382843, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1965770721435547, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1930943727493286, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.17675362527370453, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.09756825119256973, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09331323206424713, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.10771304368972778, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.09970051050186157, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.0987028256058693, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.08839427679777145, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08489461988210678, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.054760079830884933, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04759225994348526, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04662860184907913, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04639485850930214, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02732083573937416, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.023775797337293625, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.023712005466222763, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.021885106340050697, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.021854596212506294, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.014226047322154045, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.013992438092827797, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.013907749205827713, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.00888560339808464, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.42.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.19525861740112305, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.17586953938007355, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.16569645702838898, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.14958277344703674, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.08855992555618286, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.08030463010072708, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.10806863009929657, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.09943417459726334, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.09150770306587219, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.07801267504692078, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07461921125650406, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.054574813693761826, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04749484360218048, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04269146919250488, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04149376600980759, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.027548085898160934, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.02254284918308258, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.0219911877065897, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.020509498193860054, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.019762739539146423, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015024444088339806, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015029747039079666, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.01355167105793953, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010562978684902191, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.43.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.08225642889738083, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.0771189033985138, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.07496098428964615, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.06814689934253693, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03837902843952179, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03620598465204239, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.04341258108615875, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.0403282530605793, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.038882218301296234, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.034442272037267685, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.032960519194602966, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.02200169861316681, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.019253186881542206, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.01838454231619835, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.018176427111029625, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.01100145373493433, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.009475469589233398, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.009384400211274624, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.008678551763296127, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.008593767881393433, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005765815265476704, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.005816639866679907, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.005502680316567421, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0038525527343153954, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.43.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.07074170559644699, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.06623174250125885, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.06385691463947296, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05794980749487877, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.03274303302168846, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.03060929849743843, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.038084086030721664, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.035303667187690735, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03318716585636139, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.029399406164884567, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.028220118954777718, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.019215716049075127, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.01682133600115776, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.015675166621804237, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.015391284599900246, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.009614981710910797, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.008048202842473984, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.007910658605396748, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.007370342500507832, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.007229696959257126, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.005047054495662451, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004971741233021021, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004701617639511824, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0031816037371754646, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.43.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.198004350066185, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.18585552275180817, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.18203160166740417, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.16545288264751434, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.0920698493719101, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08754972368478775, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10262215882539749, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09447994083166122, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09318768233060837, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.0826367437839508, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.07909148931503296, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05193942412734032, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04497022181749344, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.043862245976924896, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04358846694231033, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.025893960148096085, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.0221476461738348, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.022059854120016098, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.020197931677103043, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.020139027386903763, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013230080716311932, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.01273314654827118, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.012778977863490582, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.007569716311991215, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.43.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.14600303769111633, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.1243683323264122, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.11793199926614761, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.09804525226354599, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.06604459136724472, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.05774034187197685, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.07627347856760025, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.06991402804851532, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.06833741068840027, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.05211992934346199, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.04654945433139801, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.03890659660100937, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.03356955945491791, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03182857856154442, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.03141732141375542, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.019572366029024124, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.01685284450650215, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.016754627227783203, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.014192236587405205, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.014036077074706554, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.010736878961324692, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.01100130844861269, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.010211890563368797, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.008005045354366302, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.43.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.17286354303359985, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1632484793663025, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1603802591562271, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.14674130082130432, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08110334724187851, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07755900919437408, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.08951479941606522, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08289916813373566, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08204706013202667, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07346872240304947, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07054280489683151, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04551559314131737, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.03960885480046272, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03880663216114044, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03861119970679283, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.022730909287929535, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.019936272874474525, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.019885703921318054, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.018387261778116226, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.018358727917075157, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.011899752542376518, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.01196693629026413, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011634329333901405, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007920894771814346, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.43.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.2037385255098343, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.19239549338817596, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.18901605904102325, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.17300093173980713, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.09549640119075775, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.09133382886648178, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.10546663403511047, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.09759271144866943, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.09659804403781891, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.08651486039161682, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.08308583498001099, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.05364269018173218, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.046586621552705765, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04564252495765686, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04541878029704094, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.026763716712594032, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.023303525522351265, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.023241931572556496, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.021459195762872696, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.021425269544124603, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.013966115191578865, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.013759267516434193, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.013656489551067352, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.008804942481219769, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.43.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.1899334341287613, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.16958415508270264, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.15920397639274597, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.1434420347213745, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.0858917385339737, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.07729460299015045, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.10583654791116714, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.09683693945407867, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.08905953168869019, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.07509534806013107, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07185114920139313, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.05359182879328728, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04630995914340019, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04143460467457771, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04022626206278801, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.027141766622662544, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.021952839568257332, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.021404238417744637, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.019882991909980774, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.0191270150244236, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.015075367875397205, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.014756358228623867, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.013597408309578896, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.010457361117005348, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.44.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.0740378201007843, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.06937071681022644, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06741149723529816, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.06117526441812515, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03450518846511841, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03252134472131729, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.03908439725637436, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.03630157187581062, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.034961629658937454, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.03093293495476246, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.029578885063529015, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.01983049139380455, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.017348475754261017, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.016541101038455963, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.016346726566553116, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.00991728249937296, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.008553893305361271, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.008471034467220306, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.007838483899831772, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.007758732885122299, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005220533348619938, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.0053034317679703236, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004975355230271816, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0035690166987478733, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.44.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.0716807097196579, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.0671810582280159, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.0649675577878952, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05887702852487564, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.033151086419820786, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.031087659299373627, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.038231655955314636, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.03544456884264946, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.033565204590559006, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.029745182022452354, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.02847781591117382, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.019240353256464005, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.01684991829097271, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.015842251479625702, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.015591501258313656, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.009629786014556885, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.00811126921325922, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.007998374290764332, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.007425783202052116, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.007309458684176207, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.005037740804255009, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004958161152899265, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004734287969768047, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.003151434939354658, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.44.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.1971537321805954, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.1850043386220932, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.1812799721956253, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.16462697088718414, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09161647409200668, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08715178817510605, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10209982097148895, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09406229108572006, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09274696558713913, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08233711123466492, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.07875083386898041, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05167822167277336, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04482031613588333, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.04367456212639809, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.0434122309088707, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.025781573727726936, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.02208854630589485, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.022000305354595184, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.020150616765022278, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.020093031227588654, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013195602223277092, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.01275248359888792, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.012753808870911598, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0076590050011873245, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.44.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.15045736730098724, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.12101967632770538, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.11182985454797745, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.08211290836334229, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.0674871951341629, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.056632764637470245, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.07942172884941101, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.07256252318620682, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.07021673023700714, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.0464906208217144, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.04076831415295601, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.04076375439763069, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.03522304445505142, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.03297223895788193, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.032429929822683334, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.020739372819662094, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.01819680631160736, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.01806289702653885, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.014525149017572403, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.014328238554298878, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.011931865476071835, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.01282014325261116, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.011276012286543846, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.010191903449594975, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.44.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.17159757018089294, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.16205115616321564, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.15920811891555786, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.14571090042591095, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08051387220621109, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07700563222169876, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.08888066560029984, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08227944374084473, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08142106235027313, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.07292629033327103, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07003861665725708, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04520268365740776, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.03932138904929161, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03852416202425957, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03833943232893944, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.02258053421974182, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.019822360947728157, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.019770024344325066, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.018284695222973824, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.018256105482578278, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.011857669800519943, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.011931953951716423, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011600660160183907, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007944074459373951, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.44.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.1928544044494629, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.18211229145526886, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.17889295518398285, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.16377444565296173, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.09039498120546341, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.08645152300596237, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09974972158670425, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.09235461801290512, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.09142839908599854, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.08187413215637207, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07862738519906998, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.050736475735902786, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04409059137105942, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.04320124164223671, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.04299478605389595, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.025310739874839783, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.022075070068240166, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.02201772853732109, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.02033592388033867, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.020305445417761803, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.013195471838116646, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.013064034283161163, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.01290681678801775, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.00839958619326353, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.44.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.18781797587871552, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.16682779788970947, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.1565038114786148, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.14041243493556976, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.08513228595256805, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.07632175087928772, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.10435081273317337, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.09566089510917664, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.08821086585521698, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.07379905879497528, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.07038452476263046, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.052998874336481094, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.045913949608802795, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.04125584661960602, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.04010371118783951, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.026684598997235298, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.02218789979815483, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.021667569875717163, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.02006666548550129, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.019352832809090614, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.014654548838734627, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.015268723480403423, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.013205014169216156, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.01129269041121006, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.45.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.08122590184211731, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.07615917176008224, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.07413984835147858, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.06736749410629272, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.03788357973098755, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03577357903122902, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.04275951534509659, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.03967423364520073, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03837771713733673, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.03400470316410065, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.03252327814698219, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.021681148558855057, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.01894981414079666, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.01814500242471695, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.017952008172869682, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.010848390869796276, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.009357258677482605, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.009277335368096828, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.008574740961194038, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.008500293828547001, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.00569531787186861, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.0057435291819274426, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.005447127856314182, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0038181254640221596, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.45.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.06811952590942383, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.06385388970375061, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.06158868968486786, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05586927756667137, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.03149442747235298, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.029473062604665756, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03658318892121315, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.03383049741387367, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03191908821463585, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.028268933296203613, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.02711441181600094, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.01843457855284214, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.01610466092824936, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.015067718923091888, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.01481259148567915, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.009220121428370476, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.0077255186624825, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.007600090932101011, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.007069381419569254, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006941850297152996, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004831516183912754, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004746241495013237, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004508320242166519, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0030192642007023096, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.45.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.19727258384227753, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.1851198375225067, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.18137244880199432, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.16473811864852905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09155672043561935, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08715302497148514, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10192341357469559, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09396466612815857, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09270063042640686, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08220326155424118, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.07860128581523895, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.05158885940909386, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.04472864419221878, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.043642789125442505, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04339473694562912, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02573104202747345, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.02208278514444828, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.02200072444975376, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.020139480009675026, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.020091485232114792, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.01317683793604374, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.012769632041454315, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.012752393260598183, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.0077106282114982605, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.45.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.1219162866473198, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.09715104848146439, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.0885174423456192, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.07010376453399658, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.053621914237737656, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.043868303298950195, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.0656866803765297, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.06008152663707733, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.05707341060042381, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.039808690547943115, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.03593657165765762, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.03382967412471771, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.029336685314774513, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.026470627635717392, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.025760797783732414, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.01729610003530979, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.014977026730775833, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.014802737161517143, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.01262015663087368, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.01228022575378418, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.010111323557794094, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.01107796747237444, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.009311607107520103, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.009021884761750698, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.45.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.16072095930576324, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.15178076922893524, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1490999460220337, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.13649173080921173, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.07537541538476944, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.07210157811641693, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.08325442671775818, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.07700595259666443, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.07623455673456192, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.06829073280096054, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.0656302198767662, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04236855357885361, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.036819782108068466, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03607770800590515, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03589978814125061, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.021186329424381256, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.018571004271507263, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.018521562218666077, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.017137126997113228, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.01711321249604225, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.01118431519716978, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.011199207976460457, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.010941911488771439, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007482762448489666, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.45.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.17710277438163757, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.16723133623600006, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.1642824411392212, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.15037012100219727, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.08296354115009308, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.0793590098619461, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.09162505716085434, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.08476068079471588, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.08390854299068451, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.0751626193523407, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.07217643409967422, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.04659532755613327, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.04046036675572395, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03963892161846161, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.039447568356990814, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.023250935599207878, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.020237039774656296, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.020183339715003967, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.018638990819454193, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.018612369894981384, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.012136505916714668, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.011947310529649258, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.011869528330862522, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.00764281302690506, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.45.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.16370591521263123, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.1442008763551712, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.13502605259418488, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.12107186019420624, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.0741540864109993, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.06610953062772751, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.09150239080190659, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.08325280249118805, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.07700865715742111, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.06389768421649933, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.060978274792432785, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.04648623988032341, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.04026801139116287, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.03620486333966255, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.03518807888031006, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.02372189238667488, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.0199083611369133, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.01947752758860588, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.01805257610976696, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.01745227351784706, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.013466108590364456, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.014182005077600479, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.012261239811778069, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.011016529984772205, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.46.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.07620621472597122, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.07153704017400742, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.06958190351724625, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.0632137805223465, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.035508207976818085, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03354789689183235, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.04014992341399193, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.037279870361089706, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.03596772253513336, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.03189888224005699, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.03049507364630699, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.020342495292425156, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.017796142026782036, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.01700521446764469, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.0168151818215847, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.010177011601626873, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.008769857697188854, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.008687549270689487, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.008041300810873508, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.00796374399214983, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005347545258700848, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.00538830179721117, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.005107926670461893, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0035774465650320053, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.46.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.0660732090473175, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.06197810545563698, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.0597565583884716, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.05423944443464279, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.030616194009780884, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.028626546263694763, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.03566398844122887, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.03300457075238228, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.03100941888988018, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.0275226179510355, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.026399865746498108, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.01798660308122635, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.015731239691376686, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.014647905714809895, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.01437852717936039, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.00900261290371418, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.0075080213136971, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.00738164409995079, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.006879307795315981, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.006755112204700708, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.004715598188340664, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.004627385176718235, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.004397721029818058, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.002955968026071787, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.46.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.20132572948932648, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.18903246521949768, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.18527694046497345, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.16819000244140625, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.09360156953334808, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.08908993005752563, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.10434021800756454, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.09599523991346359, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.09478282928466797, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.08408097177743912, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.08037418872117996, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.052796147763729095, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.045675989240407944, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.044608570635318756, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.04434672370553017, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.026319380849599838, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.02257952280342579, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.022501492872834206, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.020602179691195488, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.020562252029776573, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.013481742702424526, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.013051110319793224, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.01306246593594551, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.00789499282836914, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.46.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.10621154308319092, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.0812925472855568, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.07281466573476791, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.060818079859018326, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.046036314219236374, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.03657422959804535, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.05765695124864578, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.05217278003692627, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.04946470633149147, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.03368628770112991, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.03228157386183739, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.02949964627623558, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.02555881254374981, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.02289009839296341, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.022228633984923363, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.01523031760007143, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.013282041065394878, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.013123827986419201, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.011279590427875519, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.010927775874733925, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.009135028347373009, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.01014423742890358, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.00839939247816801, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.00846779439598322, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.46.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.14015153050422668, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.1323409527540207, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.13000278174877167, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.11905592679977417, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.0656718835234642, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.062834233045578, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.07259874790906906, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.06710806488990784, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.06641023606061935, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.059527792036533356, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.05723440274596214, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.03696561977267265, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.03209502622485161, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03145311772823334, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03130163997411728, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.01849239319562912, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.016231190413236618, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.016191771253943443, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.014996695332229137, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.014980546198785305, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.009785309433937073, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.009846551343798637, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.009574109688401222, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.006659874692559242, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.46.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.14821244776248932, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.140030175447464, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.13755637407302856, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.12596724927425385, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.06947876513004303, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.06648880243301392, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.07678212970495224, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.07099209725856781, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.07027729600667953, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.06298745423555374, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.060570187866687775, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.03914012014865875, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.033937085419893265, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.03326183930039406, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.03310265764594078, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.01955859363079071, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.01715407520532608, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.017111394554376602, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.015847232192754745, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.015824781730771065, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.010368432849645615, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.010393318720161915, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.010147030465304852, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007007971405982971, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.46.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.07169270515441895, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.062172580510377884, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.057375166565179825, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.05139539763331413, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.032766010612249374, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.028541199862957, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.041986916214227676, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.037724945694208145, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.03420909866690636, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.02787286788225174, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.026770081371068954, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.021540774032473564, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.01849427819252014, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.016273988410830498, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.01570732891559601, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.01106803398579359, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.009296320378780365, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.009057464078068733, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.008448723703622818, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.008121324703097343, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.006451315246522427, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.00701637240126729, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.005755383521318436, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.005715355277061462, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.47.self_attn.q_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.0628475472331047, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.058964285999536514, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.05712046101689339, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.05186180770397186, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.029227081686258316, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.027494776993989944, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.03361417353153229, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.031038664281368256, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.029598886147141457, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.02625223435461521, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.025150708854198456, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.01705317758023739, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.014844361692667007, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.01402717549353838, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.013832703232765198, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.00855647586286068, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.0073074232786893845, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.007214685436338186, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.006715994793921709, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.006626582704484463, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.004570907913148403, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.004619141109287739, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.004332457669079304, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.0031761920545250177, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.47.self_attn.k_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.05538786202669144, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.051930151879787445, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.04996299743652344, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.045228954404592514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.02557997591793537, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.02386043220758438, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.030072206631302834, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.02781542018055916, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.025912845507264137, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.02297106385231018, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.02201518975198269, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.015154501423239708, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.013253134675323963, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.012255435809493065, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.012007983401417732, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.0075943684205412865, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.006310104392468929, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.0061922515742480755, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.0057867420837283134, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.005659672897309065, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.00401114160194993, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.003945988602936268, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.0037232409231364727, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.002560037886723876, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.47.self_attn.v_proj", - "numel": 8388608, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17333984375, - "total_bits": 18231296.0, - "err": 0.169625386595726, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 19935232.0, - "err": 0.15914106369018555, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 22032384.0, - "err": 0.1557915210723877, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 22818816.0, - "err": 0.1413142830133438, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 27013120.0, - "err": 0.07883954793214798, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73583984375, - "total_bits": 31338496.0, - "err": 0.07494884729385376, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 25431040.0, - "err": 0.08827278017997742, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 26226688.0, - "err": 0.08116824924945831, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17333984375, - "total_bits": 26619904.0, - "err": 0.07982224971055984, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 29634560.0, - "err": 0.07073532044887543, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 30676992.0, - "err": 0.06767643988132477, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 33819648.0, - "err": 0.04473861679434776, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 34615296.0, - "err": 0.03866826742887497, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 35401728.0, - "err": 0.03763453662395477, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 36188160.0, - "err": 0.03739234432578087, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 42208256.0, - "err": 0.02233724668622017, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 43790336.0, - "err": 0.01908668875694275, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31396484375, - "total_bits": 44576768.0, - "err": 0.019006382673978806, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 46411776.0, - "err": 0.01741742342710495, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70458984375, - "total_bits": 47853568.0, - "err": 0.017351429909467697, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 50596864.0, - "err": 0.011494233272969723, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 51392512.0, - "err": 0.011142772622406483, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 52169728.0, - "err": 0.011098033748567104, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 68169728.0, - "err": 0.006839927285909653, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.47.self_attn.o_proj", - "numel": 67108864, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17205810546875, - "total_bits": 145764352.0, - "err": 0.05817507952451706, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 159395840.0, - "err": 0.04448354244232178, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 176173056.0, - "err": 0.040092386305332184, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.71893310546875, - "total_bits": 182464512.0, - "err": 0.03300068899989128, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.21893310546875, - "total_bits": 216018944.0, - "err": 0.025632528588175774, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.73455810546875, - "total_bits": 250621952.0, - "err": 0.020192869007587433, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 203426816.0, - "err": 0.031630247831344604, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 209727488.0, - "err": 0.02817286178469658, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17205810546875, - "total_bits": 212873216.0, - "err": 0.027189701795578003, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53143310546875, - "total_bits": 236990464.0, - "err": 0.01853911578655243, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656341552734375, - "total_bits": 245372928.0, - "err": 0.017255699262022972, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 270535680.0, - "err": 0.016371360048651695, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 276836352.0, - "err": 0.014323916286230087, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.21893310546875, - "total_bits": 283127808.0, - "err": 0.013247711583971977, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31268310546875, - "total_bits": 289419264.0, - "err": 0.012985733337700367, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 337644544.0, - "err": 0.00886866357177496, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.21893310546875, - "total_bits": 350236672.0, - "err": 0.008328522555530071, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.31268310546875, - "total_bits": 356528128.0, - "err": 0.008275343105196953, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53143310546875, - "total_bits": 371208192.0, - "err": 0.007316709030419588, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.70330810546875, - "total_bits": 382742528.0, - "err": 0.007207045331597328, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 404753408.0, - "err": 0.005843159276992083, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 411054080.0, - "err": 0.0068213059566915035, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2187957763671875, - "total_bits": 417336320.0, - "err": 0.005578945856541395, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 545271808.0, - "err": 0.006126216147094965, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.47.mlp.gate_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.11569724977016449, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.10913042724132538, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.10712630301713943, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.09781589359045029, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.054275792092084885, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.05188693478703499, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.060044221580028534, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.05552845448255539, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.05491342023015022, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.049120426177978516, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.04721268266439438, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.030796369537711143, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.026831338182091713, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.02628123015165329, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.026148810982704163, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.015432953834533691, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.01408111210912466, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.01404702477157116, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.013102116994559765, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.013083886355161667, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.00847124494612217, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.009276188910007477, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.008299808949232101, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.007095305249094963, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.47.mlp.up_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.171943132267442, - "total_bits": 391720960.00000006, - "err": 0.10544721782207489, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375068132267442, - "total_bits": 428355584.00000006, - "err": 0.09939799457788467, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625068132267442, - "total_bits": 473444352.00000006, - "err": 0.09755146503448486, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.718818132267442, - "total_bits": 490352640.00000006, - "err": 0.08907599747180939, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.218818132267442, - "total_bits": 580530176.0, - "err": 0.04951753467321396, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.734443132267442, - "total_bits": 673525760.0, - "err": 0.047318071126937866, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312670330668605, - "total_bits": 546704384.0, - "err": 0.05491308867931366, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125068132267442, - "total_bits": 563621888.0, - "err": 0.05066464841365814, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.171943132267442, - "total_bits": 572076032.0, - "err": 0.05010978505015373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.531318132267442, - "total_bits": 636891136.0, - "err": 0.04480338469147682, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656284066133721, - "total_bits": 659429376.0, - "err": 0.043058209121227264, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03126703306686, - "total_bits": 727059455.9999999, - "err": 0.028164274990558624, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125068132267442, - "total_bits": 743976960.0, - "err": 0.024547725915908813, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218818132267442, - "total_bits": 760885248.0, - "err": 0.02403636835515499, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312568132267442, - "total_bits": 777793536.0, - "err": 0.0239180326461792, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03126703306686, - "total_bits": 907414527.9999999, - "err": 0.01415753923356533, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218818132267442, - "total_bits": 941240320.0, - "err": 0.01298424694687128, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.312568132267442, - "total_bits": 958148608.0, - "err": 0.012956283055245876, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531318132267442, - "total_bits": 997601280.0, - "err": 0.012100578285753727, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.703193132267442, - "total_bits": 1028599808.0, - "err": 0.01208354253321886, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03126703306686, - "total_bits": 1087769600.0, - "err": 0.007874474860727787, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125068132267442, - "total_bits": 1104687104.0, - "err": 0.008678227663040161, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.21876703306686, - "total_bits": 1121586176.0, - "err": 0.007716414052993059, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125068132267442, - "total_bits": 1465397248.0, - "err": 0.006762949284166098, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.47.mlp.down_proj", - "numel": 180355072, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.177508686864099, - "total_bits": 392724736.0, - "err": 0.048986971378326416, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37518310546875, - "total_bits": 428376320.0, - "err": 0.04131540283560753, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62518310546875, - "total_bits": 473465088.0, - "err": 0.03597965091466904, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7240203147710758, - "total_bits": 491290880.00000006, - "err": 0.03221781179308891, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2240203147710758, - "total_bits": 581468416.0, - "err": 0.02242922969162464, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7182063612827037, - "total_bits": 670597376.0, - "err": 0.018008962273597717, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0312957763671875, - "total_bits": 546709568.0, - "err": 0.03316837176680565, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12518310546875, - "total_bits": 563642624.0, - "err": 0.02836783230304718, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.177508686864099, - "total_bits": 573079808.0, - "err": 0.02375127002596855, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5263458961664242, - "total_bits": 635994368.0, - "err": 0.019231094047427177, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6614287620367008, - "total_bits": 660357248.0, - "err": 0.01880745030939579, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0312957763671875, - "total_bits": 727064640.0, - "err": 0.01661391369998455, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12518310546875, - "total_bits": 743997696.0, - "err": 0.014724506065249443, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.224020314771075, - "total_bits": 761823488.0, - "err": 0.0117185078561306, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3228575240734015, - "total_bits": 779649280.0000001, - "err": 0.010939497500658035, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0312957763671875, - "total_bits": 907419712.0, - "err": 0.008831150829792023, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.224020314771075, - "total_bits": 942178560.0, - "err": 0.007524398621171713, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.334485431050146, - "total_bits": 962101504.0000001, - "err": 0.007172654382884502, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.526345896166425, - "total_bits": 996704512.0, - "err": 0.007059161085635424, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.724020314771075, - "total_bits": 1032356096.0, - "err": 0.0066296071745455265, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0312957763671875, - "total_bits": 1087774784.0, - "err": 0.005494844634085894, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12518310546875, - "total_bits": 1104707840.0, - "err": 0.006482085678726435, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.228970194971839, - "total_bits": 1123426368.0, - "err": 0.004614193458110094, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12518310546875, - "total_bits": 1465417984.0, - "err": 0.005634877365082502, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - } - ], - "last_module_idx": 98, - "base_perplexity": 8.317859772853494 + "measurement": { + "model.layers.0.self_attn": [ + { + "accuracy": 0.9800930023193359, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9827747941017151, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853895306587219, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896157383918762, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909340143203735, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919008016586304, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992597758769989, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937791228294373, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942294955253601, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943109750747681, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995823323726654, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959726929664612, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961144924163818, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962584972381592, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974860548973083, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997322678565979, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976769089698792, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975791573524475, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998449444770813, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.0.mlp": [ + { + "accuracy": 0.9635669589042664, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.964300274848938, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9747369885444641, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9803199768066406, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9822068214416504, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833824038505554, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882259964942932, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891918897628784, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901768565177917, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905440211296082, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912118911743164, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941856265068054, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939609169960022, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952983260154724, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958770275115967, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963116645812988, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963451027870178, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.1.self_attn": [ + { + "accuracy": 0.9441835880279541, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9488452076911926, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9602051973342896, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9713906645774841, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9731467366218567, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9742546677589417, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776365160942078, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9787800312042236, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9787396788597107, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.979019045829773, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868310689926147, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868574738502502, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878042936325073, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875671863555908, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929231405258179, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99301677942276, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935852289199829, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994951069355011, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964395761489868, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.1.mlp": [ + { + "accuracy": 0.9497435688972473, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9514597654342651, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9636044502258301, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9674468040466309, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9773848056793213, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9793245196342468, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848370552062988, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879286885261536, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888824224472046, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887081384658813, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896802306175232, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993708074092865, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938643574714661, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956102967262268, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953345060348511, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958858489990234, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996078372001648, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.2.self_attn": [ + { + "accuracy": 0.9304672479629517, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9340604543685913, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9444915056228638, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9591675996780396, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9627346992492676, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9638285636901855, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9699957966804504, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9712735414505005, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9760041832923889, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9765028357505798, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.982394278049469, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.983982264995575, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9839705228805542, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857084155082703, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913151264190674, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918387532234192, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992512583732605, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949471354484558, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967120885848999, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.2.mlp": [ + { + "accuracy": 0.9306711554527283, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9317218065261841, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9494115114212036, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9546176791191101, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9668757319450378, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9694381952285767, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9766857028007507, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9831506609916687, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848632216453552, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833409786224365, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854127764701843, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915655851364136, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923242926597595, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951876401901245, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953710436820984, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964795112609863, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973978996276855, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.3.self_attn": [ + { + "accuracy": 0.9332058429718018, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9356499910354614, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9488872289657593, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9618580937385559, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9650678038597107, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9677634239196777, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9738625288009644, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9771599769592285, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980128288269043, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9805722236633301, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.983823299407959, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852139949798584, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859377145767212, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873632788658142, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920437335968018, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928047060966492, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931376576423645, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994867205619812, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967489838600159, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.3.mlp": [ + { + "accuracy": 0.956680953502655, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9585724472999573, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9635801315307617, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9646449089050293, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980283260345459, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818865656852722, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834438562393188, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848878979682922, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847450852394104, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873768091201782, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859433174133301, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99040287733078, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893063902854919, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908559322357178, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905834197998047, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906701445579529, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909031987190247, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.4.self_attn": [ + { + "accuracy": 0.9807156920433044, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.981155514717102, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824636578559875, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854095578193665, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889683127403259, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894711971282959, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934898614883423, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938023090362549, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945218563079834, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946421980857849, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942536354064941, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946406483650208, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960229396820068, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962509870529175, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977087378501892, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979222416877747, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980072975158691, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985348582267761, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988486170768738, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.4.mlp": [ + { + "accuracy": 0.9739459753036499, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9745151400566101, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777454733848572, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.97867751121521, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906159043312073, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913169145584106, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930819869041443, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950442910194397, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955501556396484, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951643943786621, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955083727836609, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974499940872192, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997631311416626, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984092712402344, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984688758850098, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987096190452576, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989203810691833, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.5.self_attn": [ + { + "accuracy": 0.9861748814582825, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.986945390701294, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888960123062134, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913512468338013, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924433827400208, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923872351646423, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952816963195801, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954729676246643, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995924174785614, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960877895355225, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959037899971008, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962701201438904, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970390796661377, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973379969596863, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983842372894287, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985257387161255, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986811280250549, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990169405937195, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993733763694763, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.5.mlp": [ + { + "accuracy": 0.9773525595664978, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.977837860584259, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9823727607727051, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837546348571777, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888952970504761, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897480607032776, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916398525238037, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942992925643921, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948745965957642, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943763017654419, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995095431804657, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971676468849182, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997430145740509, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983984231948853, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984768629074097, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998780369758606, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992048144340515, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.6.self_attn": [ + { + "accuracy": 0.9830384254455566, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838992953300476, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860707521438599, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898049831390381, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910503625869751, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913862943649292, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944864511489868, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949245452880859, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954713582992554, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956619143486023, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954796433448792, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958842992782593, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965503215789795, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968291521072388, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980463981628418, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982395172119141, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985117316246033, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998806357383728, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992446303367615, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.6.mlp": [ + { + "accuracy": 0.9723787307739258, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9729722738265991, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783775806427002, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9800164103507996, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864199757575989, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.987464189529419, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897198677062988, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930344223976135, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937321543693542, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931177496910095, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940007925033569, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965372085571289, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968603849411011, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980488419532776, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981374740600586, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984995722770691, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990307688713074, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.7.self_attn": [ + { + "accuracy": 0.9796698689460754, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9806964993476868, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844582676887512, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.988379716873169, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896323084831238, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901329278945923, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930344820022583, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939324855804443, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945297837257385, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99472975730896, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947519898414612, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952683448791504, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996064305305481, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964129328727722, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977646470069885, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980430006980896, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981924295425415, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986841678619385, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999193549156189, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.7.mlp": [ + { + "accuracy": 0.9674018621444702, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9681084156036377, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743604063987732, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9762453436851501, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9839526414871216, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851893782615662, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877821803092957, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917733669281006, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925855994224548, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918639659881592, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929053783416748, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959049224853516, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962823390960693, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976913928985596, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977872371673584, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982010722160339, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988346695899963, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.8.self_attn": [ + { + "accuracy": 0.9752789735794067, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.975614607334137, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9790194630622864, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842453598976135, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870651364326477, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.987440824508667, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920685887336731, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927417635917664, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933977127075195, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936566352844238, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934083819389343, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939486980438232, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948147535324097, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952362775802612, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971776008605957, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974847435951233, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978929162025452, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983791708946228, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990546703338623, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.8.mlp": [ + { + "accuracy": 0.9620954990386963, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9629386067390442, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9700493216514587, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9721861481666565, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813472032546997, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9827811121940613, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857302904129028, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904528856277466, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913877248764038, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.990551769733429, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917629361152649, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952481985092163, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957076907157898, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997334361076355, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974587559700012, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997934877872467, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987100958824158, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.9.self_attn": [ + { + "accuracy": 0.970201849937439, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9726364016532898, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9772294163703918, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830317497253418, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851276278495789, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859434366226196, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99103182554245, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919366240501404, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928060173988342, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930648803710938, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929361343383789, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933927655220032, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945298433303833, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949636459350586, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969095587730408, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972683191299438, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976581335067749, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998167097568512, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989244937896729, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.9.mlp": [ + { + "accuracy": 0.9573321342468262, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9582739472389221, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9662586450576782, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.968643307685852, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789685606956482, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980595588684082, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838851690292358, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989219069480896, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902729392051697, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893350005149841, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99069744348526, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994619607925415, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951197504997253, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969539642333984, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971039295196533, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976291060447693, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998486340045929, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.10.self_attn": [ + { + "accuracy": 0.9663341045379639, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9663162231445312, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9725334644317627, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9787842631340027, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832478761672974, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836918115615845, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903167486190796, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909451603889465, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917513132095337, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921988844871521, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917721748352051, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925121665000916, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935699701309204, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941157102584839, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964207410812378, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967845678329468, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997390627861023, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977889657020569, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986574053764343, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.10.mlp": [ + { + "accuracy": 0.9532442688941956, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9543027281761169, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9628568291664124, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9654065370559692, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9769850969314575, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9787513017654419, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.982261598110199, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881941676139832, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893389344215393, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883214831352234, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.98981112241745, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941027164459229, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946494102478027, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966486692428589, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968208074569702, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973794221878052, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983254671096802, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.11.self_attn": [ + { + "accuracy": 0.9604716897010803, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9617511034011841, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.968413233757019, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9765810370445251, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980535089969635, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811338782310486, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.988677442073822, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892933964729309, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902457594871521, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906780123710632, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902089834213257, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910175204277039, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923781752586365, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929957389831543, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957993030548096, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962096810340881, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969737529754639, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974725246429443, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985476136207581, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.11.mlp": [ + { + "accuracy": 0.9491015672683716, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9502574801445007, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9597464203834534, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9625500440597534, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9749575257301331, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9768937230110168, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9807647466659546, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871600270271301, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884147047996521, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.987290620803833, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889194965362549, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935898184776306, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941516518592834, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963613152503967, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965381622314453, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971531629562378, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981698393821716, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.12.self_attn": [ + { + "accuracy": 0.9638631939888, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9642468690872192, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9705459475517273, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9780733585357666, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9820680618286133, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825600981712341, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889926314353943, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898890852928162, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909480214118958, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913040399551392, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911235570907593, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919421076774597, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928687810897827, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934417605400085, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996062159538269, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964714646339417, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971629977226257, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976528286933899, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986501932144165, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.12.mlp": [ + { + "accuracy": 0.9447681307792664, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.946013867855072, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9561264514923096, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9591128826141357, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.972824215888977, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9749164581298828, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9790410995483398, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860776662826538, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874228835105896, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862100481987, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879764318466187, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930555820465088, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936918020248413, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960737824440002, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962615966796875, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969205260276794, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980535507202148, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.13.self_attn": [ + { + "accuracy": 0.9588255882263184, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9597347378730774, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9666991829872131, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9742264151573181, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797823429107666, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9801427721977234, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883213639259338, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989088773727417, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901308417320251, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905831813812256, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901182651519775, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907715320587158, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921107292175293, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927554130554199, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956770539283752, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960997104644775, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968166947364807, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974351525306702, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998357355594635, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.13.mlp": [ + { + "accuracy": 0.9410810470581055, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9424535632133484, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9530321359634399, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9561689496040344, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9709913730621338, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9732133746147156, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9775407910346985, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851484298706055, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865684509277344, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852891564369202, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871681928634644, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925965666770935, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932898283004761, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99583500623703, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960266947746277, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967208504676819, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979627132415771, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.14.self_attn": [ + { + "accuracy": 0.9585748910903931, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.959814727306366, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9667726159095764, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9746048450469971, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795907735824585, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9802403450012207, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880690574645996, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886711239814758, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989996612071991, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904159307479858, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901021718978882, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909869432449341, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920281171798706, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927430152893066, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956445097923279, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960619807243347, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969175457954407, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974275231361389, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998556911945343, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.14.mlp": [ + { + "accuracy": 0.9379606246948242, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9393935203552246, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9504369497299194, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9537109732627869, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9694175720214844, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9717757701873779, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9762751460075378, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9843055605888367, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858224391937256, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844751954078674, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864628314971924, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921694397926331, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929004907608032, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955599904060364, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957985281944275, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965211153030396, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978247284889221, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.15.self_attn": [ + { + "accuracy": 0.9581625461578369, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9597067832946777, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.96772301197052, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9747770428657532, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.97945237159729, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9802442789077759, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876974821090698, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886754155158997, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898871183395386, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902554154396057, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900950789451599, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909392595291138, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921430945396423, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927928447723389, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956232905387878, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961509108543396, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967575669288635, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974329471588135, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984924793243408, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.15.mlp": [ + { + "accuracy": 0.9361906051635742, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9376870393753052, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9488378167152405, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9521490335464478, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9685388803482056, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9709659814834595, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9755187630653381, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838749766349792, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854190945625305, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840456247329712, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860830903053284, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919641613960266, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992714524269104, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954643845558167, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956595301628113, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996385931968689, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99772709608078, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.16.self_attn": [ + { + "accuracy": 0.9568060040473938, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9581800103187561, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9648329019546509, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9724477529525757, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9781988263130188, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9788833260536194, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872744083404541, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880862236022949, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893022179603577, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897257089614868, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894102215766907, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903362989425659, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915648102760315, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922764301300049, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953945875167847, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958482384681702, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967045783996582, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972475171089172, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984207153320312, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.16.mlp": [ + { + "accuracy": 0.933670163154602, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9352047443389893, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9466569423675537, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9500972032546997, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9672523140907288, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.96976238489151, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9744793176651001, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9831913113594055, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848037362098694, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833864569664001, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854975938796997, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916146993637085, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923990964889526, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952470660209656, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954919219017029, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962462186813354, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976567029953003, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.17.self_attn": [ + { + "accuracy": 0.959131121635437, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9608429670333862, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9662474393844604, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743590950965881, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795129299163818, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9801461696624756, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874228239059448, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883750677108765, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898442029953003, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903357625007629, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901219010353088, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908755421638489, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918085336685181, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925777912139893, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954468011856079, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960154891014099, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967064261436462, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972922801971436, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984549283981323, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.17.mlp": [ + { + "accuracy": 0.9318019151687622, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9333848357200623, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.945061981678009, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9485942721366882, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9663283824920654, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9689136147499084, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9737256765365601, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9827081561088562, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9843688607215881, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829200506210327, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850930571556091, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913855791091919, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921860098838806, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951237440109253, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953452944755554, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961119890213013, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975478053092957, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.18.self_attn": [ + { + "accuracy": 0.9610167741775513, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9624461531639099, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9678136706352234, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9753912687301636, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9808831810951233, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814605712890625, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883527159690857, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989033579826355, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901847839355469, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905783534049988, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904896020889282, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913077354431152, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920714497566223, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992831289768219, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995674192905426, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961156845092773, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968946576118469, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973107576370239, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985576868057251, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.18.mlp": [ + { + "accuracy": 0.9289742112159729, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9305921196937561, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9427095055580139, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9463954567909241, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9649171829223633, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9676034450531006, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9726067781448364, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9819508790969849, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836809635162354, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821920394897461, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844500422477722, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910052418708801, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918289184570312, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948786497116089, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951733946800232, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959771633148193, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974870681762695, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.19.self_attn": [ + { + "accuracy": 0.9592658281326294, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9609959125518799, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9661493897438049, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9741843938827515, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795964956283569, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9799654483795166, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874234199523926, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877415299415588, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895380735397339, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900311231613159, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898573160171509, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907590746879578, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991531491279602, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923195838928223, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953656792640686, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957922101020813, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966760873794556, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971663951873779, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982833862304688, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.19.mlp": [ + { + "accuracy": 0.9279268383979797, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9295452833175659, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9418519139289856, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9456520080566406, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9643076658248901, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9670325517654419, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9721567034721375, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9815253615379333, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833051562309265, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818213582038879, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841241240501404, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907402396202087, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915408492088318, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945903420448303, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948631525039673, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956478476524353, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970240592956543, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.20.self_attn": [ + { + "accuracy": 0.9558507800102234, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9580075740814209, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9639827013015747, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9726521968841553, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9780781865119934, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9787028431892395, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870149493217468, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874838590621948, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890743494033813, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896116852760315, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893556237220764, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.990132212638855, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910963773727417, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917925596237183, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952100515365601, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957480430603027, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965659976005554, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971890449523926, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983456134796143, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.20.mlp": [ + { + "accuracy": 0.9274551272392273, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9290608763694763, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9414796829223633, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9453130960464478, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9640334844589233, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9668107628822327, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9720117449760437, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814985990524292, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833067655563354, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9817559719085693, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840872883796692, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907976388931274, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916784167289734, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947901964187622, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950611591339111, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958993792533875, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974404573440552, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.21.self_attn": [ + { + "accuracy": 0.9567034840583801, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.958748996257782, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9639908671379089, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.973642885684967, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9788212180137634, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9791392087936401, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871401190757751, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875941276550293, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890919923782349, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896222352981567, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895391464233398, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903624653816223, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911293387413025, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919131398200989, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951489567756653, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956391453742981, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965286254882812, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970330595970154, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982517957687378, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.21.mlp": [ + { + "accuracy": 0.9262429475784302, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9278771877288818, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9403714537620544, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9442184567451477, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9634274840354919, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9662408232688904, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9714785814285278, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811845421791077, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830218553543091, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814318418502808, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.983805775642395, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906305074691772, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915274977684021, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946848750114441, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949420094490051, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957783818244934, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973343014717102, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.22.self_attn": [ + { + "accuracy": 0.964180588722229, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.965822160243988, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.970266580581665, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776046276092529, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818762540817261, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.982368528842926, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888126850128174, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894089698791504, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908254742622375, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912506341934204, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910248517990112, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917961955070496, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925424456596375, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933213591575623, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959163665771484, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964348077774048, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970038533210754, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976601600646973, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986461400985718, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.22.mlp": [ + { + "accuracy": 0.9264614582061768, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9281120896339417, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9402446746826172, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.943993866443634, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9634973406791687, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.966299295425415, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.971410870552063, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.981220006942749, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830453395843506, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814749360084534, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838348627090454, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906554222106934, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915553331375122, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947212934494019, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950163960456848, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958446025848389, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974684715270996, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.23.self_attn": [ + { + "accuracy": 0.9672321677207947, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9681163430213928, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9726198315620422, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.979219377040863, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830436706542969, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9835019707679749, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989208459854126, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989604115486145, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911029934883118, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991540253162384, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915454983711243, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923515915870667, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930952787399292, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937841892242432, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961169362068176, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965517520904541, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996988832950592, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975231885910034, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984544515609741, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.23.mlp": [ + { + "accuracy": 0.9283046126365662, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9299112558364868, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9416734576225281, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9452886581420898, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9643793106079102, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9671559929847717, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9720916152000427, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.981707751750946, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834961295127869, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9819230437278748, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.984260618686676, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909000992774963, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918037056922913, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948866367340088, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951176047325134, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959119558334351, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974939823150635, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.24.self_attn": [ + { + "accuracy": 0.9653584957122803, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.966646134853363, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9712920188903809, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9787885546684265, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825012683868408, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.98296058177948, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891852140426636, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897653460502625, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908915758132935, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913374781608582, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912213683128357, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919354915618896, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927515387535095, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934023022651672, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960042834281921, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964162707328796, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997079074382782, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975471496582031, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985069036483765, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.24.mlp": [ + { + "accuracy": 0.929088830947876, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9306501150131226, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9420560598373413, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9455745816230774, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9647427797317505, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9674885869026184, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9722828269004822, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818928241729736, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836512207984924, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.982113242149353, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844096302986145, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99098801612854, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991875946521759, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949254989624023, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952046871185303, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959834218025208, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975930452346802, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.25.self_attn": [ + { + "accuracy": 0.9655715227127075, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9668852090835571, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9718791842460632, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9784544706344604, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9827254414558411, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832611083984375, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893888235092163, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898439049720764, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909557700157166, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913544654846191, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914355874061584, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921239018440247, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929906129837036, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936841130256653, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961917400360107, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966323971748352, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972003102302551, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978347420692444, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988068342208862, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.25.mlp": [ + { + "accuracy": 0.9294280409812927, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9310038089752197, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9421603679656982, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9456098079681396, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9649169445037842, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9676299095153809, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9723220467567444, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9819887280464172, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837225079536438, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9822164177894592, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844849109649658, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910404682159424, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919237494468689, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949697256088257, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952052235603333, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995963454246521, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975623488426208, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.26.self_attn": [ + { + "accuracy": 0.9677987694740295, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9690551161766052, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9734504222869873, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9801328778266907, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.984133780002594, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844918251037598, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904332756996155, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907654523849487, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917294979095459, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921165704727173, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921057820320129, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926771521568298, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933891892433167, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939525723457336, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963516592979431, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966714382171631, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973614811897278, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977279305458069, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985732436180115, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.26.mlp": [ + { + "accuracy": 0.9302108287811279, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9317727088928223, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9426621198654175, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9460402727127075, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9652842879295349, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9679799675941467, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9725514054298401, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821592569351196, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838862419128418, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9823821187019348, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846451878547668, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911174178123474, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920048117637634, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949945211410522, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952415823936462, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959794878959656, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975627660751343, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.27.self_attn": [ + { + "accuracy": 0.9645341038703918, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.965753972530365, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9707216620445251, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782591462135315, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.982609212398529, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829657077789307, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897640347480774, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901759028434753, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991071879863739, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915483593940735, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913859963417053, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920276999473572, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992912232875824, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935011267662048, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996126651763916, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965032935142517, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972699284553528, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976511597633362, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986923336982727, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.27.mlp": [ + { + "accuracy": 0.9305377006530762, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9320939183235168, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9430216550827026, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9464213848114014, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9654381275177002, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9681191444396973, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9727216362953186, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9822217226028442, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9839528203010559, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824665188789368, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847126007080078, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911554455757141, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920283555984497, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950027465820312, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952920079231262, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960398077964783, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997623860836029, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.28.self_attn": [ + { + "accuracy": 0.9722875952720642, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9740546345710754, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9784924983978271, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838358163833618, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864665269851685, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868291616439819, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913133978843689, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917309284210205, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926469326019287, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929615259170532, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933212995529175, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938680529594421, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946563243865967, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951145052909851, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997012734413147, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973286986351013, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976455569267273, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981006383895874, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989246726036072, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.28.mlp": [ + { + "accuracy": 0.9309771656990051, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9325085282325745, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9433149695396423, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9466583132743835, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.965662956237793, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9683294892311096, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9728586673736572, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9823333024978638, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840366244316101, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825677871704102, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847903251647949, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911661148071289, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920333027839661, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949563145637512, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995277464389801, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960044622421265, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975523352622986, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.29.self_attn": [ + { + "accuracy": 0.9697163105010986, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9709169864654541, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9749261140823364, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.981679379940033, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850536584854126, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855064749717712, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908041954040527, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912158846855164, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991916298866272, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923673272132874, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992623507976532, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931619167327881, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938792586326599, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943373203277588, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965323209762573, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968374967575073, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974081516265869, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977153539657593, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985132217407227, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.29.mlp": [ + { + "accuracy": 0.9334163665771484, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9348925352096558, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9451784491539001, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9483669400215149, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9668887853622437, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9694380760192871, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9737595319747925, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829493761062622, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845852851867676, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.983183741569519, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.985313892364502, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914677143096924, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923008680343628, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951154589653015, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953584671020508, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960381984710693, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974756836891174, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.30.self_attn": [ + { + "accuracy": 0.9708197712898254, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9715149998664856, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9758592247962952, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9809736013412476, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856275320053101, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859455823898315, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916478991508484, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919296503067017, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925447106361389, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928755164146423, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929220080375671, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935187101364136, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941959977149963, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994614839553833, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967489838600159, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99698805809021, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976038336753845, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978833198547363, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986225962638855, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.30.mlp": [ + { + "accuracy": 0.9353439807891846, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9367631673812866, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9468156099319458, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9498594403266907, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9678400754928589, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9703398942947388, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9745261669158936, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9835060834884644, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850995540618896, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836717247962952, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857708811759949, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917500615119934, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992579460144043, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953340888023376, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955409169197083, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962042570114136, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976480603218079, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.31.self_attn": [ + { + "accuracy": 0.9766544699668884, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776263236999512, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816561937332153, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861233830451965, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882856607437134, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886627793312073, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926826357841492, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930065870285034, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938056468963623, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940770864486694, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943221211433411, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947682023048401, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995431661605835, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958293437957764, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974820613861084, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997687578201294, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980550408363342, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983713030815125, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999047577381134, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.31.mlp": [ + { + "accuracy": 0.9359499216079712, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9373830556869507, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9471551775932312, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9500893950462341, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9681574702262878, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9706377387046814, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9746962189674377, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836949706077576, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852597713470459, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.983837366104126, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859185218811035, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918448328971863, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926727414131165, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954110980033875, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955899715423584, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996233344078064, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976861476898193, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.32.self_attn": [ + { + "accuracy": 0.981233537197113, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.982043981552124, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850028157234192, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.987699568271637, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.990547239780426, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909716248512268, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940959215164185, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944697022438049, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951368570327759, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953779578208923, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954210519790649, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957728385925293, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963272213935852, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966598749160767, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979604482650757, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982084631919861, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984655380249023, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988442063331604, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999336838722229, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.32.mlp": [ + { + "accuracy": 0.9363219141960144, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9377471208572388, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9473070502281189, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9501505494117737, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9683274030685425, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9708220362663269, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9747730493545532, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838047027587891, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853551983833313, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9839245080947876, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860069751739502, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918909668922424, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992731511592865, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954437017440796, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956848621368408, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963238835334778, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978408813476562, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.33.self_attn": [ + { + "accuracy": 0.980019748210907, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9807413816452026, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837047457695007, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872443675994873, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899165034294128, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901942014694214, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937924146652222, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940743446350098, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948091506958008, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950181841850281, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949795007705688, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953925609588623, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959647059440613, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963642358779907, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977505803108215, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980506896972656, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998318612575531, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99874347448349, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999242901802063, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.33.mlp": [ + { + "accuracy": 0.9367879033088684, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9382192492485046, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.94762122631073, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9504041075706482, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9685793519020081, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9710199236869812, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9749000668525696, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9839383363723755, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854692220687866, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840590357780457, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861069917678833, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919593334197998, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927846789360046, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954923391342163, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957032799720764, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963269829750061, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978294968605042, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.34.self_attn": [ + { + "accuracy": 0.9791404008865356, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9801942706108093, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832372665405273, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868319630622864, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896460175514221, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899576902389526, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936583638191223, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940474629402161, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946163892745972, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994840681552887, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948614835739136, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953175783157349, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959158897399902, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996293842792511, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977084398269653, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979961514472961, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982557892799377, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986550211906433, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991330504417419, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.34.mlp": [ + { + "accuracy": 0.9370977878570557, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.938556432723999, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9477664232254028, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9504872560501099, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9687469005584717, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9711794853210449, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.974963366985321, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840289950370789, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855492115020752, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841451048851013, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861893653869629, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920191168785095, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928438663482666, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955458045005798, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957543611526489, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963685274124146, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978995323181152, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.35.self_attn": [ + { + "accuracy": 0.9793671369552612, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980634868144989, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842567443847656, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875925183296204, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899290800094604, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901847839355469, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938464164733887, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942835569381714, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950029850006104, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951663613319397, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952086210250854, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955612421035767, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961711764335632, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964938163757324, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977835416793823, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979638457298279, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982456564903259, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984722137451172, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989574551582336, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.35.mlp": [ + { + "accuracy": 0.9377451539039612, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9391962885856628, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9481552839279175, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9507923126220703, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9690450429916382, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9714760184288025, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9751537442207336, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.984188973903656, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856953024864197, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.984290361404419, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863249659538269, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920863509178162, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929054975509644, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995575487613678, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957502484321594, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963387846946716, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978328347206116, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.36.self_attn": [ + { + "accuracy": 0.974165678024292, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743756055831909, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9801942706108093, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833793044090271, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872068166732788, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876880645751953, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929361939430237, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932911396026611, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939282536506653, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941912293434143, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939253330230713, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945407509803772, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953635334968567, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957568049430847, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974581003189087, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977299571037292, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981262683868408, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985705018043518, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991815686225891, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.36.mlp": [ + { + "accuracy": 0.9379904866218567, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9394456148147583, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9482663869857788, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9508625864982605, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9691871404647827, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9715903997421265, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.97520911693573, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842644333839417, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857505559921265, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9843704104423523, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863848686218262, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921400547027588, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992947518825531, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956305027008057, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958345890045166, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964243769645691, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979755878448486, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.37.self_attn": [ + { + "accuracy": 0.9746426343917847, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9751870036125183, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9799465537071228, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841497540473938, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875713586807251, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878732562065125, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926744699478149, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930937886238098, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993908703327179, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941635727882385, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939675331115723, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944710731506348, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995245099067688, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956148862838745, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973616600036621, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976533651351929, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980480074882507, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998445987701416, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990638494491577, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.37.mlp": [ + { + "accuracy": 0.9385639429092407, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.940009593963623, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9486178159713745, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9511471390724182, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9694699048995972, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9718564748764038, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9753795862197876, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9843988418579102, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858676195144653, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845024347305298, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864999651908875, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921872615814209, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929866790771484, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956223964691162, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958266019821167, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963938593864441, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979007244110107, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.38.self_attn": [ + { + "accuracy": 0.9795901775360107, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804625511169434, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837955236434937, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872989654541016, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897749423980713, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900639653205872, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938891530036926, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941232204437256, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948256015777588, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950358867645264, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949997663497925, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953690767288208, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960042834281921, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963437914848328, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978004097938538, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980725049972534, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983618259429932, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987565279006958, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999243974685669, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.38.mlp": [ + { + "accuracy": 0.9384301900863647, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9399006366729736, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9484113454818726, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.950918436050415, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9693983793258667, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9717985391616821, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9752809405326843, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9843745231628418, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858508110046387, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844756722450256, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864881634712219, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99220210313797, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930021166801453, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956682920455933, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958592653274536, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964250922203064, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997981607913971, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.39.self_attn": [ + { + "accuracy": 0.9760702848434448, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9775062799453735, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812119603157043, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844355583190918, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884986281394958, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888470768928528, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993106484413147, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933972358703613, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941746592521667, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944333434104919, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994265079498291, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947752356529236, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954180121421814, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958494305610657, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975273609161377, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977539777755737, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998189389705658, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984936118125916, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990658760070801, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.39.mlp": [ + { + "accuracy": 0.9386256337165833, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.94008469581604, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9484606981277466, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9509347677230835, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9694626331329346, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9718942642211914, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9753097295761108, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844041466712952, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858697652816772, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845015406608582, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865157604217529, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921905398368835, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929875135421753, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956239461898804, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995820939540863, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963693022727966, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997887909412384, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.40.self_attn": [ + { + "accuracy": 0.9801753163337708, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.981099009513855, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.984032928943634, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875447154045105, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901061058044434, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904682040214539, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942255020141602, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994509220123291, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950581789016724, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952777624130249, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995255172252655, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955610632896423, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961006045341492, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964563250541687, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978601336479187, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980557560920715, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998429000377655, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986870884895325, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991942048072815, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.40.mlp": [ + { + "accuracy": 0.9382447600364685, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9397351741790771, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9480016827583313, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9504536390304565, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9692959189414978, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9717113375663757, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9750775098800659, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842763543128967, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857592582702637, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844105839729309, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864184856414795, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921382665634155, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929260611534119, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955759644508362, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958152770996094, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963604807853699, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979093074798584, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.41.self_attn": [ + { + "accuracy": 0.9748270511627197, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9762071371078491, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9801239967346191, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847546219825745, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878023266792297, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880443811416626, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925763010978699, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928976893424988, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935824275016785, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939430952072144, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939813017845154, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994408130645752, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950937628746033, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995532751083374, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972965121269226, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975578188896179, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979912042617798, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983301758766174, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989895224571228, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.41.mlp": [ + { + "accuracy": 0.9380540251731873, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9395470023155212, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9477434754371643, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9502046704292297, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.969237208366394, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9716219902038574, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9749593138694763, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842112064361572, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856977462768555, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9843839406967163, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863762259483337, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921252131462097, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992901623249054, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955677390098572, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957366585731506, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962688088417053, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977636337280273, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.42.self_attn": [ + { + "accuracy": 0.9806851148605347, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816950559616089, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847092032432556, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878072142601013, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903959631919861, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908172488212585, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941912293434143, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945164918899536, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951170682907104, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953135251998901, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953287839889526, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957553148269653, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963214993476868, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966014623641968, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979473948478699, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981303215026855, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984419345855713, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987218379974365, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992130398750305, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.42.mlp": [ + { + "accuracy": 0.9371722936630249, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.938726544380188, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9468337297439575, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9493154287338257, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9687827825546265, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9712164402008057, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9745301604270935, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9839523434638977, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854544401168823, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841383099555969, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861705303192139, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919793605804443, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927709102630615, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954431653022766, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957048296928406, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962378144264221, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977869987487793, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.43.self_attn": [ + { + "accuracy": 0.9701538681983948, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9713791608810425, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9756102561950684, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814963936805725, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854576587677002, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856342077255249, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914855360984802, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918665289878845, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925466179847717, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928730130195618, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927729368209839, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933313727378845, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940761923789978, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945785999298096, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996798574924469, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970707297325134, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977564811706543, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980527758598328, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989070892333984, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.43.mlp": [ + { + "accuracy": 0.9367422461509705, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9382758736610413, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9464203119277954, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9489640593528748, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9685754179954529, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.971007764339447, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743295311927795, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.983794629573822, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853248596191406, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840484857559204, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860715270042419, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991936981678009, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927180409431458, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995420515537262, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956493377685547, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996182918548584, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977150559425354, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.44.self_attn": [ + { + "accuracy": 0.9690093994140625, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9706190824508667, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9759246110916138, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9808601140975952, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851641654968262, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853693246841431, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991560697555542, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919132590293884, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992569088935852, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928901195526123, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926820993423462, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931730031967163, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942017197608948, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946497678756714, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968062043190002, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970336556434631, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976707100868225, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979747533798218, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987433552742004, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.44.mlp": [ + { + "accuracy": 0.9368401169776917, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9383623003959656, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.946504533290863, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9491025805473328, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9685496091842651, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9709866046905518, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743472933769226, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837198853492737, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.985267698764801, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840162396430969, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860338568687439, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918889999389648, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926514625549316, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953343868255615, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956356287002563, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961757659912109, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976853132247925, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.45.self_attn": [ + { + "accuracy": 0.9685677289962769, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9701920747756958, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9764515161514282, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9807252287864685, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848027229309082, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852157831192017, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915295839309692, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918795228004456, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927209615707397, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930016398429871, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928101897239685, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931959509849548, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944538474082947, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948583245277405, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969334602355957, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971445202827454, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976902008056641, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979966282844543, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987844228744507, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.45.mlp": [ + { + "accuracy": 0.9407219886779785, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9421713948249817, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9496870636940002, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9521428942680359, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9704959392547607, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9727914333343506, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9758973121643066, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846319556236267, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.986102819442749, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.984964907169342, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868557453155518, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923187494277954, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929889440536499, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954713582992554, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957993030548096, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996288537979126, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997621476650238, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.46.self_attn": [ + { + "accuracy": 0.9776943922042847, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9784016609191895, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9826298356056213, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862691760063171, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892516732215881, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895190596580505, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937610030174255, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939894080162048, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945799708366394, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947929978370667, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946375489234924, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950839281082153, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958848357200623, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961767792701721, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977140426635742, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978521466255188, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982840418815613, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984675049781799, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990358948707581, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.46.mlp": [ + { + "accuracy": 0.9473329186439514, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9494473338127136, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9552974104881287, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9573683738708496, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9746665358543396, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.97672039270401, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.979377031326294, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864553213119507, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877625107765198, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870514273643494, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886022806167603, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931972026824951, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935863018035889, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956056475639343, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960218071937561, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964144229888916, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973106384277344, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.47.self_attn": [ + { + "accuracy": 0.9858475923538208, + "total_bits": 319709184, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864258170127869, + "total_bits": 328032256, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883655905723572, + "total_bits": 334976000, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914183020591736, + "total_bits": 400508928, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931609034538269, + "total_bits": 474230784, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932454824447632, + "total_bits": 474430464, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996307909488678, + "total_bits": 608710656, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963991641998291, + "total_bits": 608975872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966539144515991, + "total_bits": 613971968, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967660307884216, + "total_bits": 622903296, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966647624969482, + "total_bits": 625425408, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968550801277161, + "total_bits": 629307392, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997254490852356, + "total_bits": 636313600, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974396824836731, + "total_bits": 645775360, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984261393547058, + "total_bits": 783691776, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998417854309082, + "total_bits": 796770304, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988263249397278, + "total_bits": 910700544, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987491369247437, + "total_bits": 941670400, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991878867149353, + "total_bits": 1212690432, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.47.mlp": [ + { + "accuracy": 0.955906093120575, + "total_bits": 1209168864, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9579992294311523, + "total_bits": 1252848608, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9622502326965332, + "total_bits": 1399238912, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9639694094657898, + "total_bits": 1570418944, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797136187553406, + "total_bits": 1770569040, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9810196757316589, + "total_bits": 1818255616, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830011129379272, + "total_bits": 1955300688, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.988390326499939, + "total_bits": 2235687248, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888335466384888, + "total_bits": 2268651776, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890041351318359, + "total_bits": 2302459216, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896001219749451, + "total_bits": 2350145792, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924117922782898, + "total_bits": 2834349392, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923677444458008, + "total_bits": 2882035968, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936841726303101, + "total_bits": 3282550096, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939578175544739, + "total_bits": 3396745280, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941152334213257, + "total_bits": 3702929472, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944548606872559, + "total_bits": 4345444416, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.norm.norm": null, + "lm_head.linear": null + }, + "last_module_idx": 98 } \ No newline at end of file