diff --git "a/measurement.json" "b/measurement.json" new file mode 100644--- /dev/null +++ "b/measurement.json" @@ -0,0 +1,93655 @@ +{ + "measurement": { + "model.layers.0.self_attn": [ + { + "accuracy": 0.8205160341764751, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8382251764598646, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8512430191040039, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9126273393630981, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9155572213624653, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9160806568045365, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9482073470165855, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9485845879504555, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9522175161462081, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9556724178163629, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9569694054754156, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9600463760526556, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9624065380347402, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9655057285961351, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9792067973237288, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9826642118002239, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863714811049009, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989476317637845, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964407215777197, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.0.mlp": [ + { + "accuracy": 0.9095033156244379, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.910369735015066, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9396635042993646, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9559729569836667, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9521158212109616, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9562686085700989, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9704878957648027, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9726632927593432, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.976212761904064, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9755798703745792, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9788674947462583, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874917825585917, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894219746715144, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927965535929328, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938002518917385, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973062008227173, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978538541809508, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.1.self_attn": [ + { + "accuracy": 0.9840831301714245, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870253401367288, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883612159051394, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929842407766142, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930732854102787, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932626556409033, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948568285295838, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995087959264454, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955146434275728, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956882090160721, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965894273237178, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968436019201028, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99686421434346, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971388670566835, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982508468000513, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985391706424324, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986323943655742, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991733716114571, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995710453214613, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.1.mlp": [ + { + "accuracy": 0.9773361808375308, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9826083308772037, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824539030853071, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825550851068998, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922346127660651, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931521580407494, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931329190731049, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942392938231167, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947406704488554, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960627171554064, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966895584213106, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979086902580763, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982218126717367, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982182138452405, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987364305477393, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987364884858069, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990344071074536, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.2.self_attn": [ + { + "accuracy": 0.9654159796865363, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9697982480651454, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9713567683571264, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833959827297613, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836229697654122, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838403149654991, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902815889371069, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905358623517188, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912078600180777, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917025417089462, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917703161114141, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923235703455774, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927750159251062, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933563023805618, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960006695044669, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996650803638132, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974375276973373, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998185120326908, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992856657818744, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.2.mlp": [ + { + "accuracy": 0.9588846030988192, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9634204224536294, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9638011643761083, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9671529500108016, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9826733328794178, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844156535048234, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845375183381533, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869174580825003, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885163307189941, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912570432612771, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925115947660647, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954599691064734, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961837181135228, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963579703318445, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978266538758027, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980238725088144, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998554264930518, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.3.self_attn": [ + { + "accuracy": 0.9496551407010931, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9553866449155306, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9577856565776625, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9749248968927484, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9762458487560874, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9764988234168605, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858639538288116, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861223368268264, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870811841989818, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878504488028979, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880362764785164, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888276387202112, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894207088570846, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902517764191878, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942022162048441, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951173572948104, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963072012913855, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973443417172683, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990076434455419, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.3.mlp": [ + { + "accuracy": 0.9425169292249178, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9458780539663214, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9500507994701988, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9562249026800457, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9717723476259332, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9742757270210668, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9775700851490623, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811280150162546, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834792912006378, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855498044114364, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875998089188024, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992558700473685, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937427522320497, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948567869631868, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996441596432736, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997461410141305, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982630686932489, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.4.self_attn": [ + { + "accuracy": 0.9446369033110769, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9491736010501259, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9526251962310389, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9719572192744205, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9730644194703353, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9733252682183918, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845850342198422, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848616609447881, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859112140379453, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867511328897978, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863957198042619, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873160382634715, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880955987855008, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890375874544445, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934563577959412, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945037725724672, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959666250567687, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970764460924425, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989206725241322, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.4.mlp": [ + { + "accuracy": 0.921914019082722, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9389589836722926, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9389830576746088, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9393739261125263, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912385956237191, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891816934472636, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896647169401771, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938401089687097, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960256328708247, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965079207169382, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966972791834882, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976293572077626, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979346508258268, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976963402801439, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988503481604551, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989953953772783, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999194534152354, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.5.self_attn": [ + { + "accuracy": 0.9938844948222763, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942421819034376, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946444736499536, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964802174975997, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969611960022073, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970053071646314, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981600495153352, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982133164609733, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983497063972449, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984406424000075, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984731434011146, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985817347310091, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986390336171577, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987545655550141, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992589465783615, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993776695704774, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995201735904342, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996697119633225, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9998707243756048, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.5.mlp": [ + { + "accuracy": 0.989808451188238, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901199082010671, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915129354125575, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922887588802137, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949157085073622, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953254774997109, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961558756859679, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968233867303321, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972914733776921, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973210706130454, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976937949265304, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985346595119489, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987007914797256, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990184470815094, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991730035919892, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993468272548757, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994990046773302, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.6.self_attn": [ + { + "accuracy": 0.9939366394751951, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943358804050245, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948059591023546, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996442397958354, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969600119480961, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970201183306543, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981466431758905, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982214232808665, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983769835610139, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984805847665197, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984812230656022, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985908184592661, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986742445708889, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987848984371674, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992687436997106, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993900809633104, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995186053412525, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996723963290846, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9998699094806063, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.6.mlp": [ + { + "accuracy": 0.9889652313370454, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893288839804498, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906646193642366, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913397690183238, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945782226951498, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951522781660682, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995932463752596, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969727794982886, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972741978341028, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972263400099779, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976334605169924, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985032393351981, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987153977547821, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990190921449348, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992075622767994, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993704404485854, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995589755279454, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.7.self_attn": [ + { + "accuracy": 0.9934064361609911, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937883133166715, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944011788619193, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960321858525276, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966950753801748, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967517358692068, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980177114668646, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980864334655436, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982702790906555, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983586892485619, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983516292352426, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984759590343425, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985770341008902, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987001718070946, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992156149819493, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993502912846836, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994786216044113, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996605916684004, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9998582565029593, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.7.mlp": [ + { + "accuracy": 0.9871237340726351, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875701399225938, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893268550697126, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900096051002804, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935677075072339, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941273182630539, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949358497795305, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964790395215938, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967998103483727, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967154633057745, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971619662093488, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998317428129284, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985797375832733, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990364146467886, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991511153547388, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993444698323545, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997283651885626, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.8.self_attn": [ + { + "accuracy": 0.9935303091218597, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939844941622332, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945101169379134, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995996645014537, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967612284970911, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968539999895975, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978967938376101, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980233419490488, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981626305533083, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982738273316308, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983783332925094, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985055099976691, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985892960899755, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987156969544134, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992228064097857, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993530376569221, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994566114619374, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996475505789644, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9998510884684756, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.8.mlp": [ + { + "accuracy": 0.9839362470727218, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.984474025274578, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869786900909323, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878012208562148, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918909653236991, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926332454932364, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936805681178444, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995661836313574, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996081364782233, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958502767901671, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964167185519871, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978758688820036, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982075269677138, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988105078472903, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989184830337763, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999161925853083, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996719224574534, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.9.self_attn": [ + { + "accuracy": 0.9914312150917555, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919502954734, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926648296807942, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948772803733223, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956398300434414, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957538651008355, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973607933834979, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975059406929895, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977323991295538, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997852767768659, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978309383517817, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979993793133058, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981250602163767, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983094620861506, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989625690014738, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991458358341142, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993119987806207, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995503277566872, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9998141785863003, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.9.mlp": [ + { + "accuracy": 0.9811103626301414, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9817124824774893, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9849435781177721, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859562029964045, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904333219716424, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912604689598083, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926513096219615, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949822268987957, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954692023365121, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951153193649492, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957561139997683, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975086115300655, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978815156378245, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986432045698166, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987303035235718, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990323911371984, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996394315293353, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.10.self_attn": [ + { + "accuracy": 0.9909576420721254, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914552216467104, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922125237552744, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994080159224962, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953231137049826, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954652029432749, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971728883683681, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997291436320857, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976199467323328, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977327873440165, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977410547435284, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979257616949709, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980363461532091, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982255881554202, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989148935205058, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991203744552637, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992603013095888, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995342948915142, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999796340732198, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.10.mlp": [ + { + "accuracy": 0.9797802347885934, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804333448410034, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836708307266235, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846921814115424, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897195469391974, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906157168902849, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919988497307426, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945948629787094, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951114831002135, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947525803980074, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954414759811602, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997324613952323, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977259486913681, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985353719247015, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986381522919002, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989343584564171, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996049759027205, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.11.self_attn": [ + { + "accuracy": 0.9890047358839136, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898373437555212, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907129602996927, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933007492830879, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945448225266055, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946905601965753, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964249847750914, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966147761595877, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969697969132348, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971771224548942, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972842057284556, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975309713106406, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976405152364781, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978996303520704, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987064907817464, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998945016511961, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990870236958328, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994178138379204, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999747244347083, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.11.mlp": [ + { + "accuracy": 0.9805398442243275, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811315207104934, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842541045264194, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.98520827293396, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901886409834811, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99100550930751, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923183070985895, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948985396247161, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953555594149389, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995008358437764, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956494373710532, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974613997497057, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978341748447794, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986220775662284, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987096260056684, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998974008191573, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99963494929436, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.12.self_attn": [ + { + "accuracy": 0.9891094494807092, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898344701842258, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.990775993780086, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934360757470131, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945711238603843, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946677112265637, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967307936596242, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968311384712395, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970758872988977, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972625639485685, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973013985313868, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974993000689306, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976948689865438, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978908094528475, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987385478851042, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989442948840166, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991626713032785, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994226012280897, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997731840336009, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.12.mlp": [ + { + "accuracy": 0.9787190399671856, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9793464729660436, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.982686067882337, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837120034192738, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892294963723735, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901162166344492, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915496205028734, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944309556954786, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994933751460753, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994517583596079, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995219533772845, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972131975779408, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976209121707239, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985012464029225, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985797938547636, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988659705574575, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996004887170306, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.13.self_attn": [ + { + "accuracy": 0.9887357401220422, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894793119869734, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904238456173947, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931374545160093, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942925513574952, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944421503888933, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964443888319167, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966438326396441, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996882637864665, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970285529760938, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971612525221548, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973661548605091, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975664813659693, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997756963497714, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986451327016479, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988788746689495, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990761409464636, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993734315135762, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999751727979042, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.13.mlp": [ + { + "accuracy": 0.9771768764445656, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778563991973275, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814961317338442, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825909059298666, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884516541895113, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894299397343084, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909588827898628, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940217858866641, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945643952018336, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941081718394631, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948724876893195, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969952590763569, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974433271900603, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983775755880695, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984646326813259, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987761106734213, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995703117333745, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.14.self_attn": [ + { + "accuracy": 0.9881172172333065, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.988774014930976, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897396807607851, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925015600104081, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939815476536751, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994102626254684, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962843801630171, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964186098230513, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968088400599203, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969767341880422, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970476150904831, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972626127694783, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974563035525774, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997665612125083, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986078349971458, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988337715989665, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990758939008964, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999365508360298, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997460889924121, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.14.mlp": [ + { + "accuracy": 0.9765870680934504, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9772843216594896, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.981147217123132, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9822914255292792, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881762854362789, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891841991951591, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907823871625098, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938757407822107, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944372259472546, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939676123230081, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947549539961313, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969244997360205, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973855983269843, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983405094397696, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998429733965742, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998761661546795, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995602887417925, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.15.self_attn": [ + { + "accuracy": 0.9869644539920908, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.987599663044277, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887897811437908, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916063680460578, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935452232235357, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936702745525461, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962609002464696, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964223021739408, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966967180371284, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968848379426881, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967964948400071, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970467586658502, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997321995465379, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975196877984625, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998533159690468, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987656255497744, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990353430376241, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993368135275025, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997437578184825, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.15.mlp": [ + { + "accuracy": 0.976587198282543, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9773323065356204, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811794428448928, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9823110746709924, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882064563663382, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892477314723166, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908229418491062, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938986344557059, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944545124706469, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939722407021021, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947809910303668, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996912750366487, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973853753977701, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983123899682572, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984227247340115, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987534223810622, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995583119361025, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.16.self_attn": [ + { + "accuracy": 0.9879505053946847, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886576760756342, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896817309291739, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925277719372197, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939654838097723, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940525720778265, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963592577137446, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964690584885446, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967979428015257, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970453356049562, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969956631723204, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972576532316836, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974658387271982, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976725178329569, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985848775035456, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988301641454822, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990679651106659, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999363553465197, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997480222465176, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.16.mlp": [ + { + "accuracy": 0.9771893667547327, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778833357911361, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816872257935373, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828216715862876, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884525608075293, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894648233526632, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910515107606587, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939910122438481, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945549980590218, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941031548537707, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948834180831909, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969916663279659, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974469911111029, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983664316762435, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998462543871842, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987942795024106, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995599367018593, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.17.self_attn": [ + { + "accuracy": 0.9873134195804596, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880595003303728, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888370327259365, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914186643926721, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936113765365199, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937534928321838, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960899909860209, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962698005531964, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965152344421336, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968453016328184, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968889598783693, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971749062993025, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973568161459345, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997574120563896, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998513595348126, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987794657875049, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990102623246218, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993311818689108, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997338245279694, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.17.mlp": [ + { + "accuracy": 0.9777198182909113, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783872052242881, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9820239198835272, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9831105078521528, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886947399691531, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896869941761619, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991217188144985, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941213589749838, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946714965136427, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942174604848811, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949673433837137, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970471780551108, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974932041215269, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983994354935068, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984887379564737, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988079340638298, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995716299959704, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.18.self_attn": [ + { + "accuracy": 0.9858573486930445, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865915006712863, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876608197626314, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903309721695749, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930795784059324, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932061355364951, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957292570095313, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959068196384531, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961912173189615, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965276337767902, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965861098546731, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968524655621303, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970832861175662, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973448703163549, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983810416765904, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986636360224924, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989115939916748, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992520068527052, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997086462956902, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.18.mlp": [ + { + "accuracy": 0.9778035173290655, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9784905251703764, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821505311288332, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832403910787482, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887228388535348, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897272571137077, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912623770927128, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994119999047957, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99467927923328, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942240864038467, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994999272650794, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970489514893607, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975043443080626, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983954829605002, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984905911903632, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988101273775101, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995713404036666, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.19.self_attn": [ + { + "accuracy": 0.9876478910446167, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881831972222579, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891791955420846, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916128806377712, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936006327992991, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936314954569465, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959491942273943, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959929605063639, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965725188192568, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996694866763918, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968986473977566, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970744742374671, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972692816273162, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975157338929804, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984941480583266, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987553952163771, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989732193123353, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993038941664916, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997246353151766, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.19.mlp": [ + { + "accuracy": 0.9775467750273252, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782602473309165, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9819913271226381, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.983110708625693, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885486936882922, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896013728882137, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911573042995051, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940205839903731, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945977789006735, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941296695094359, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949324311394441, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996995744932639, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974677760742212, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983666912701569, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984622382019696, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987919303147417, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995644762621898, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.20.self_attn": [ + { + "accuracy": 0.9861964614767778, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869937590862575, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879886633471439, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908703393057773, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993047916575482, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931708464497014, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995670858574541, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958049525555811, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961992462998942, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962857110719932, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965740724613792, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967909190607699, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970146223510566, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972583500570372, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998359925946907, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986404951073622, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988754743612126, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992159780997195, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996969074893155, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.20.mlp": [ + { + "accuracy": 0.977756541026266, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9785211870544835, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821950401130476, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833029179196608, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886652788049296, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897038019017169, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912558911662352, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940718004578039, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946267644041463, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941912152265248, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949777318458808, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970284439623356, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974897427386359, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983742598836359, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984765860595202, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988061178867754, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995600240010964, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.21.self_attn": [ + { + "accuracy": 0.9845405644492099, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853915117288891, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859435432835629, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882537628474989, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926778878036299, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928174826659655, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958113169199542, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995880634769013, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963199088447973, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996478934037058, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964236091626318, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967426494940331, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969943363807703, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972232460583511, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983754993269318, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986175625143867, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989499861472532, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992604617421564, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997155439892882, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.21.mlp": [ + { + "accuracy": 0.9783195116017994, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9790565293086203, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9827037234055368, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837970216023294, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889427541117919, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899497032165527, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914792242803072, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942018119128126, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947537799414835, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99432717224485, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950915512285734, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970961405258429, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975447601785785, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984132206361545, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985136928919115, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988418099911589, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995762200437879, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.22.self_attn": [ + { + "accuracy": 0.9857536695505443, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864574796275088, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874055315005151, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904895346415671, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926637865995106, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928007619945627, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955958751471419, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957778296972576, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961479121917173, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964097487299066, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963639323648653, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966351538896561, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968722464614793, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971354950807596, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982457655040842, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985586335196307, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988521370840701, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991984463934052, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996964624641758, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.22.mlp": [ + { + "accuracy": 0.9700834625645688, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9729651212692261, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9767675462522005, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.977772079016033, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876372312244616, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892939046809548, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909113237732335, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938066331179518, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99449102423693, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940648004412651, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947938213222906, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968826257084545, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973936390719915, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981951133200997, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984058155433128, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987618373216767, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994765358829969, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.23.self_attn": [ + { + "accuracy": 0.9847077962599302, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856971753271002, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867753715891587, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893723192967867, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925412606251868, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927024715825131, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956861844188288, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958723626638714, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962057271286061, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964591876456612, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962236414614477, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965591960047421, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968775677445688, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971271220006441, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982558561390952, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985870671899695, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988693000846788, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992575482220242, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996905115089918, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.23.mlp": [ + { + "accuracy": 0.9788671731948853, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795900144075093, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.982930843767367, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9839415691400829, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891397858920851, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902190809187136, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916133653176459, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942601201565642, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948401851089377, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943864082819537, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951917093835378, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971035105225287, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975641681567619, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983852583130723, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985201888178524, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998813921683713, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995636860104767, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.24.self_attn": [ + { + "accuracy": 0.9857935623118752, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868643777935129, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.988079622387886, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908676476855027, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929213327796835, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930734563814966, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954177588224411, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955730053939318, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960684682193556, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963181144312808, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965109334964501, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996787950004402, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969648254152975, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972448966612941, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983378754634606, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985827199722591, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988570952493894, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991604123068484, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996793064239778, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.24.mlp": [ + { + "accuracy": 0.9795229811417429, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980158562722959, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834205116096296, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844015130871221, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895533433085993, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904917055054715, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918759708341799, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945361010338131, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950454823280636, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946356047140924, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953578953680239, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972407804115823, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976782116450762, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984796029564581, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985956448669496, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988891699008251, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995966490946318, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.25.self_attn": [ + { + "accuracy": 0.9865994657340803, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871421448494259, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880553405535849, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906364044076518, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929804417647814, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932038611487338, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955198686373862, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958089723398811, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961904248124674, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963433617039731, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965248672585738, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968135970595636, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969289116561413, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972153502075296, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982480563615498, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985973731075463, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998781904382141, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999198488498989, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996501034566838, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.25.mlp": [ + { + "accuracy": 0.9785807854250858, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9792601858314715, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9826406067923495, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836777150630951, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890426750245848, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900302471298921, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914952195004413, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942720473596924, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948032666978083, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943794839476284, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951316737814954, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971224628388882, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997565015170135, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984299207204267, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998526886987843, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988380141164127, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995754611139235, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.26.self_attn": [ + { + "accuracy": 0.9867341110580846, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872617996052692, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881531541284762, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.990660736435338, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932060571093309, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933253644328368, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956584561027979, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957940202010306, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960956585250402, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961329298584085, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966367263542978, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968700248159861, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997008999319453, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997206942031258, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983381297635404, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985682013395586, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988622990878004, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991762415554962, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996862531835703, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.26.mlp": [ + { + "accuracy": 0.977714722093783, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9784590994056902, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9820274491059152, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9831448009139613, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885139308477703, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895924274858675, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911388135270068, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939335600325936, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945287241747505, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940866655425021, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948954660641519, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996956601150726, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974398005165552, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983047650832879, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984288350924065, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998753974704366, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994828098110462, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.27.self_attn": [ + { + "accuracy": 0.9842598014756253, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851145038479253, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864798345063862, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895626941793844, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914963331661726, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916428633426365, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942891444814833, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944409382970709, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951146692037582, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954699254349658, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958296214279375, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960977313549895, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963880749909502, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966500185822186, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997994231746385, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983331278750771, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998535003120962, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991009335376715, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996128464852901, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.27.mlp": [ + { + "accuracy": 0.9759427591374046, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.976779569136469, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9805708198170913, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9817993578157926, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873432705276891, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888757443741748, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905616084211751, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934269327082133, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941181167960167, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936765742145086, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945483737085995, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967388075433279, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972663098259976, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981631352321098, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983189270684594, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998675435683445, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994357174850608, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.28.self_attn": [ + { + "accuracy": 0.9834861331864407, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.984231195951763, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857337459137565, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888466502490797, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914105809048602, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915918888230073, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944238619584786, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946410350109401, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951083958148956, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953860899335459, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957282445148418, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959849574063954, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962836947096022, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965668995129434, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997944533236717, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982556194851273, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998492091207912, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999061396639598, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995800014095086, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.28.mlp": [ + { + "accuracy": 0.9749058704627188, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9758688823172921, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798659243081745, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812051842087194, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871195711587605, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883125432227787, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900650334985632, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930714713899713, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937622147171121, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933992051764539, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942991804135474, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966257853727591, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971488720100177, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998096413910389, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982883704728202, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998670470165579, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999481023877467, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.29.self_attn": [ + { + "accuracy": 0.9815504237225181, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.982386882367887, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838705486372897, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874235446515837, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903814800475773, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906137993461207, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937671140620583, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940738807383337, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943243833748918, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945565638573546, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949922098925239, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953544739829866, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955781878609407, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959244994740737, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997426075762824, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979097196146062, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980158054907071, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998925470011799, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994649435148427, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.29.mlp": [ + { + "accuracy": 0.9734090660747728, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743730551318118, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9786028814943213, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9800157531311637, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863466683186983, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875897373023786, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989462454852305, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927148324878592, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934178606459969, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930058811840258, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939469475495187, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964225135351482, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969740780560594, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980077753333669, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981802974484468, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985844022349307, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994576154766899, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.30.self_attn": [ + { + "accuracy": 0.9808014646956795, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816694541981346, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9835026734753659, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874522270340669, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900813134093034, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903438491256613, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938427335337588, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941351519603479, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944203166585219, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947039626146618, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948856701192103, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952999467128202, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956540500647143, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960086129997906, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975776580211363, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997967501612086, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982611915390742, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989332201841631, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995390983102354, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.30.mlp": [ + { + "accuracy": 0.9720498009731895, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9730267650202701, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9774947088015707, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789884404132241, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856842834698526, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869608471268102, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889480495139172, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923523335080398, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930931808132875, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926788352037731, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936574701415865, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962622993870786, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968316592276096, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979192565538382, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981048938475157, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985262539825941, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994333387401543, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.31.self_attn": [ + { + "accuracy": 0.980685221521478, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812213166763908, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.983482721604799, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.986929714679718, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899586501874422, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901922486330333, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936260204566153, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993744621151372, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943925467760939, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948511543242555, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951866382831022, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995452366377178, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958160825465855, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962913303783065, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977591296559886, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981207492712297, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984076689732703, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989994729035779, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995798564172889, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.31.mlp": [ + { + "accuracy": 0.9698706237893355, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9708822777396754, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9757348597049713, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9773728863189095, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845825480787378, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859340598708705, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.98809914290905, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917214301071668, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925203127296347, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921226517150277, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993163210780997, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959826979197954, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965848256098596, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977458223308387, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979678029684644, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984247568798693, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993822597653458, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.32.self_attn": [ + { + "accuracy": 0.9772631102486661, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9808053954651481, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.98248469672705, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860974001257043, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989284297353343, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898072418413664, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929660339104501, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934733847254201, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994040406848255, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944970094059643, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946922408907037, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951539670950488, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954694203640285, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958294555544853, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974025727102631, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978895395209915, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980201491791951, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989220186283714, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994679565394395, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.32.mlp": [ + { + "accuracy": 0.9673337246242323, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9684021378818312, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9736863421766382, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9754751042315835, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832720552620134, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847085319067302, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870759091879192, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991069357646139, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919334720624121, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991455119691397, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925695657730103, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956245567453536, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962921719017782, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975590827433687, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977880557508845, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982835870040091, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993394686791458, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.33.self_attn": [ + { + "accuracy": 0.9785026547155882, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9792770461032265, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9817060828208923, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853611384567461, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.98892159132581, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890492299669668, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928716381913737, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932588166312167, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939837706716437, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943891222539701, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945882605878931, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949026162687101, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954223648497933, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958449606048433, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975134212719766, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978906598530317, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982535790063833, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989227387858065, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995337339902395, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.33.mlp": [ + { + "accuracy": 0.9626807382232264, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.963894109976919, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9699012197946247, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.971996683823435, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980839657156091, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825365810017836, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852373035330522, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896654000407771, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906748313652841, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902111182087346, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915107716070978, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950042488543611, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957595973422653, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971789584348076, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974759487729323, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980444267000023, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992196871163813, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.34.self_attn": [ + { + "accuracy": 0.97520417759293, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9764219459734464, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9779126487280193, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9805573130908766, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863274081757194, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866256792294351, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922110469717729, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992805825252282, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935788574971651, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994173018948028, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994011577022703, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946128992657912, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953207934373304, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957466372533849, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974367395043373, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979074226790353, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981345540206683, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989359366070283, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995054514392426, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.34.mlp": [ + { + "accuracy": 0.9601811703882719, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9614555365160892, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9676796950792012, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9698883138204876, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795821277718795, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813699737975472, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841738280497099, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889943756555256, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.990077156769602, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895677778281664, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909506706815017, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946777569620233, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954832439359865, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970060051663926, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973096078947971, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978975920300734, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991705343127251, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.35.self_attn": [ + { + "accuracy": 0.972711908189874, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9755962867485849, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776274922646975, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824149263532538, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.986643080648623, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868550951543608, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917479031964352, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921207561304695, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927977205891358, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930723678124579, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934993719584063, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939066524568357, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944272562861443, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949444206921678, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996960673285158, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974270945316867, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978710720805746, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986548320831437, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994206354512196, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.35.mlp": [ + { + "accuracy": 0.956941893226222, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9583078811043187, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9649922188959623, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9674022166352523, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778625196532199, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798279985001213, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828848227074272, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880810058430621, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892384307949167, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886893912365562, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901973652212244, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942153464806708, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951024882887539, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967385269701481, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970723214118105, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977033628444922, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999084358838828, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.36.self_attn": [ + { + "accuracy": 0.9731209717298809, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9745212448270697, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9775138842432123, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811692833900452, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862683933032187, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869188705557271, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913932649712813, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917483776807785, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926746934652328, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993099423615556, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932420089056617, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937970501027609, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945201701239536, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951188164322, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969216408698183, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975679632472364, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977107857795138, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987310808348028, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993785144271035, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.36.mlp": [ + { + "accuracy": 0.9534331967956141, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9548713282534951, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9619856978717604, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.964536870780744, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9760850087592476, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9781276552300704, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814143102420004, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871838226130134, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884080698615626, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878005730478387, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893904524414163, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937870545606864, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994710628923617, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965280906150216, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968680043361688, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975456752275166, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990409580304435, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.37.self_attn": [ + { + "accuracy": 0.9732412884109899, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9747504262547744, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9772215171864158, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813028291652077, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864578137272283, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870589602934686, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913507801921744, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991817693961294, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923505406630667, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928251159818549, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934230031151521, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940120087642419, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994515046085182, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995042738161589, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969459427030463, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975272958215914, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977369843737075, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986298060730884, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993815951931634, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.37.mlp": [ + { + "accuracy": 0.9512007299222445, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9526898296255815, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9599853189367997, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9625108649856166, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.974962199989118, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9770785256435996, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804405538659346, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866757800704554, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879338317795804, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872417559749201, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888748950079868, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935035176183048, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944537274147335, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963955867447352, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967135632116544, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973920805281714, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990005912749391, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.38.self_attn": [ + { + "accuracy": 0.976010168853559, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9770550978811163, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9792156752787138, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9831228381709048, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875860261289697, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880192836648539, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922785719758586, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926575406601554, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934413440917668, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937435761094093, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939013680345133, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943999059890446, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948517342931346, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953063761717394, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971031628941235, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976831260872515, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979086008511091, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987395628306427, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994325721146244, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.38.mlp": [ + { + "accuracy": 0.9492053515032718, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9507104691706205, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9581474379489296, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.960700511932373, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9739867182154405, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9761195621992412, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795497012765784, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862319827079773, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875126057549527, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867478514972486, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884237631371147, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932586942848406, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942276211161363, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962954878022796, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965869752984298, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997286676772331, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989882145861262, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.39.self_attn": [ + { + "accuracy": 0.9736984952499992, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.975146555586865, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782643302490837, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825349214829897, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866786159967121, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.986991043153562, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916786041698957, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921274373405858, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929222056740209, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933792454631705, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934763249598051, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940437329442877, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945884680277423, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951503888556832, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969819543000898, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975712683640028, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978332180333765, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986791984227142, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999403314321841, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.39.mlp": [ + { + "accuracy": 0.9464477865319503, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9479977394405165, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9555837606128893, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9581883267352456, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9725200251529091, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9747746931879144, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.978284555046182, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854474506880108, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868220953564895, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859987795352936, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877725620018808, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928766645883259, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939038349609626, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960816059457628, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963887028003994, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970953754688564, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998923280423409, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.40.self_attn": [ + { + "accuracy": 0.9745384470412606, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9761476077531513, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9792258723786003, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825061889071214, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873496419505069, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878273629828503, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991121033304616, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919452682921761, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992895624355266, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932340250203484, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993581738911177, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943445599392841, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946887010806486, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954651272610614, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970453757988779, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976935890552244, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976475027046705, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987344391840068, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993633378395125, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.40.mlp": [ + { + "accuracy": 0.9450207760459498, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9465845258612382, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9541723853663394, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9567647105769107, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9717792021600824, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9740987090688003, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9775916008572829, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850769497846302, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864877556499682, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.985617165502749, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.98743552832227, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926806437341791, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937407378303377, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959838331529969, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962932592944095, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969864019045704, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998904473883541, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.41.self_attn": [ + { + "accuracy": 0.9743090366062365, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9756225096551996, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9796415049778787, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828858767685137, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869887271994039, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874191911597001, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991455750245797, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919901952931756, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930566517930282, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933047373043863, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937493295261735, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942903577497131, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947985610679576, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954828957193776, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971454712121111, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997743809693738, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977586528188304, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987930528035289, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993752829828545, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.41.mlp": [ + { + "accuracy": 0.9433609121724179, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9449470796083149, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9524982602972734, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9550456812507228, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9708989764514723, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9732986531759563, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9767832912896809, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846977105266169, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861273232259249, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851715172591963, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870479161802092, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924456477165222, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993548677155846, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958788195722982, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961618930101395, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968447502898542, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988796689400548, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.42.self_attn": [ + { + "accuracy": 0.9754306015215422, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.976837118989543, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9794494760663885, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9822983302568135, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873351352779489, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878120477262297, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908987644471621, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914188949685347, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925905779788369, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931964976222891, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939574748277664, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947290467588525, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947605125213924, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955622122475976, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970953209619773, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977729122497534, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976239073041239, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987522549927235, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993567441247011, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.42.mlp": [ + { + "accuracy": 0.9414088851527164, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9430647900230006, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9506304922856783, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9532081327940288, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9698917552044517, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9724137061520627, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9758825208011427, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841210481367613, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856288542872981, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846376337503132, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865997872854534, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921677049837614, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993320674488419, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957092325938376, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960174560546875, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967002166729224, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988270520575737, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.43.self_attn": [ + { + "accuracy": 0.9707527788061845, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9731890120004353, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9759941634378935, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9801796470817766, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.984829057204096, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.985394327264083, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894812036501733, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899618155077884, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910995246548402, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915963160364252, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924106668484839, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933279462550816, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937245704625782, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944470360090858, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964385930644838, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971381082738701, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971468597650528, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984001285935703, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992254347001251, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.43.mlp": [ + { + "accuracy": 0.9387011402531674, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9404389795504118, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9481711136667352, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9508628719731381, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9684423804283142, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9711272873376545, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9746917862641183, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832871822934401, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848756821531999, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838832773660359, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859700202941895, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991779097600987, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929998807216945, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954707951922166, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958178138262347, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965167767123172, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987577103862637, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.44.self_attn": [ + { + "accuracy": 0.9686325663014462, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.97027220537788, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9738342856106005, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.978800787737495, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.983345555631738, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9839776675952109, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877029743633772, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884634998283888, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901249824385894, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907395988702774, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916205272862786, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928690516634991, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925335049629211, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938889628178195, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957446906911699, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969143753773287, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964705874261103, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982231882841963, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990431025232139, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.44.mlp": [ + { + "accuracy": 0.9301851799613551, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9321154544228002, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9409458072561967, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9441493247684679, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9638101050728246, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9669096563991747, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9711249815790277, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9806312306931144, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825207089122973, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814850593868055, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838791866051523, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905237116311726, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919404661969135, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947380674512762, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951689651137904, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959905669093132, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984602461519995, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.45.self_attn": [ + { + "accuracy": 0.9647209393350702, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9686925505336962, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.972458729618474, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.977437044444837, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828655547217319, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832001877458472, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870756080276087, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874085031057659, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889237700324309, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897446702969702, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914505254281195, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923713764077738, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924768883931009, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932741169866762, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958508634253552, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966582750019274, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966517741742887, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980500616917485, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999064067672742, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.45.mlp": [ + { + "accuracy": 0.9250534333680805, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9271924119246633, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9365847048006559, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9400480295482435, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9610823173271983, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9645889966111434, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9690709866975483, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9791362144445118, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812049740239194, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9799773551915821, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9826698946325403, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897509170205969, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913290707688582, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942508959456494, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947933342896009, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957018318144899, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983682676561569, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.46.self_attn": [ + { + "accuracy": 0.9585064994661432, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9636571532801578, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9679578636821947, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9736893631910023, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980704687143627, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9815211233339811, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861807634955958, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871155368654352, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882552192399376, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891850760108546, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906153467140699, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913877115437859, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991755494161656, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924944090215784, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953290271131616, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962935961390796, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963289138517881, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978400619798585, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989918850754437, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.46.mlp": [ + { + "accuracy": 0.9047728902415225, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9066393124429803, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9131091895856356, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9157525238237882, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9501386598536843, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9575342190893072, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9608277490264491, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9640985821422777, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9693544695251867, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9736501198065909, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.979011036847767, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875761748928773, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900136942926206, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907911443396619, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942673338871253, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950365239852353, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970375534735227, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.47.self_attn": [ + { + "accuracy": 0.976994442312341, + "total_bits": 133692416, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782109354671679, + "total_bits": 137624576, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816309872426485, + "total_bits": 141915264, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9849149176948949, + "total_bits": 167308544, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886288705625033, + "total_bits": 197778944, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886727929115295, + "total_bits": 197944704, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912535673693607, + "total_bits": 254402048, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912506192922592, + "total_bits": 254567808, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918025230106554, + "total_bits": 256707328, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922280029246682, + "total_bits": 260323328, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939179251852789, + "total_bits": 260859264, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944445392803142, + "total_bits": 262998784, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944585314706752, + "total_bits": 267385856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949226473507128, + "total_bits": 271333376, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967932230547855, + "total_bits": 328326656, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971759146765659, + "total_bits": 334247936, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972815341071078, + "total_bits": 380231168, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985499657494458, + "total_bits": 396638208, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991998686210105, + "total_bits": 506060288, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.47.mlp": [ + { + "accuracy": 0.9226503497675845, + "total_bits": 474871952, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9252785632484838, + "total_bits": 492566672, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9326919066278558, + "total_bits": 549465856, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9356925173809654, + "total_bits": 616640256, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.960615452967192, + "total_bits": 694777456, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9642153758751719, + "total_bits": 714362112, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9677367775063765, + "total_bits": 768025456, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9784141684833326, + "total_bits": 878169328, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804728925228119, + "total_bits": 891112704, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9796509774107682, + "total_bits": 903509616, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9823856338074333, + "total_bits": 923094272, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893900916764611, + "total_bits": 1112241776, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909586616252598, + "total_bits": 1131826432, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936608107466447, + "total_bits": 1288992368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945147127697342, + "total_bits": 1332028224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952938850773009, + "total_bits": 1451303744, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977275546836225, + "total_bits": 1706107712, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.norm.norm": null, + "lm_head.linear": null + }, + "last_module_idx": 98 +} \ No newline at end of file