{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-10-21T19:51:11.362685Z","iopub.status.busy":"2024-10-21T19:51:11.362279Z","iopub.status.idle":"2024-10-21T20:01:23.251497Z","shell.execute_reply":"2024-10-21T20:01:23.250324Z","shell.execute_reply.started":"2024-10-21T19:51:11.362647Z"},"trusted":true},"outputs":[],"source":["%cd /kaggle/working\n","!git clone --depth=1 https://github.com/ggerganov/llama.cpp.git\n","%cd /kaggle/working/llama.cpp\n","!sed -i 's|MK_LDFLAGS += -lcuda|MK_LDFLAGS += -L/usr/local/nvidia/lib64 -lcuda|' Makefile\n","!LLAMA_CUDA=1 conda run -n base make -j > /dev/null"]},{"cell_type":"code","execution_count":4,"metadata":{"execution":{"iopub.execute_input":"2024-10-21T15:49:55.214707Z","iopub.status.busy":"2024-10-21T15:49:55.214216Z","iopub.status.idle":"2024-10-21T15:57:54.086080Z","shell.execute_reply":"2024-10-21T15:57:54.084916Z","shell.execute_reply.started":"2024-10-21T15:49:55.214666Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["/kaggle/working\n","main: build = 1 (f594bc8)\n","main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n","main: quantizing '/kaggle/input/llama-3-llm-to-gguf/llama-3.1-8b-chat-math-teacher.gguf' to 'llama-3.1-8b-chat-math-teacher-Q4_K_M.gguf' as Q4_K_M\n","llama_model_loader: loaded meta data with 29 key-value pairs and 292 tensors from /kaggle/input/llama-3-llm-to-gguf/llama-3.1-8b-chat-math-teacher.gguf (version GGUF V3 (latest))\n","llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n","llama_model_loader: - kv 0: general.architecture str = llama\n","llama_model_loader: - kv 1: general.type str = model\n","llama_model_loader: - kv 2: general.name str = Llama 3.1 8b Chat Math Teacher\n","llama_model_loader: - kv 3: general.finetune str = chat-math-teacher\n","llama_model_loader: - kv 4: general.basename str = llama-3.1\n","llama_model_loader: - kv 5: general.size_label str = 8B\n","llama_model_loader: - kv 6: llama.block_count u32 = 32\n","llama_model_loader: - kv 7: llama.context_length u32 = 131072\n","llama_model_loader: - kv 8: llama.embedding_length u32 = 4096\n","llama_model_loader: - kv 9: llama.feed_forward_length u32 = 14336\n","llama_model_loader: - kv 10: llama.attention.head_count u32 = 32\n","llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 8\n","llama_model_loader: - kv 12: llama.rope.freq_base f32 = 500000.000000\n","llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010\n","llama_model_loader: - kv 14: llama.attention.key_length u32 = 128\n","llama_model_loader: - kv 15: llama.attention.value_length u32 = 128\n","llama_model_loader: - kv 16: general.file_type u32 = 1\n","llama_model_loader: - kv 17: llama.vocab_size u32 = 128258\n","llama_model_loader: - kv 18: llama.rope.dimension_count u32 = 128\n","llama_model_loader: - kv 19: tokenizer.ggml.model str = gpt2\n","llama_model_loader: - kv 20: tokenizer.ggml.pre str = llama-bpe\n","llama_model_loader: - kv 21: tokenizer.ggml.tokens arr[str,128258] = [\"!\", \"\\\"\", \"#\", \"$\", \"%\", \"&\", \"'\", ...\n","llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,128258] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...\n","llama_model_loader: - kv 23: tokenizer.ggml.merges arr[str,280147] = [\"Ġ Ġ\", \"Ġ ĠĠĠ\", \"ĠĠ ĠĠ\", \"...\n","llama_model_loader: - kv 24: tokenizer.ggml.bos_token_id u32 = 128256\n","llama_model_loader: - kv 25: tokenizer.ggml.eos_token_id u32 = 128257\n","llama_model_loader: - kv 26: tokenizer.ggml.padding_token_id u32 = 128257\n","llama_model_loader: - kv 27: tokenizer.chat_template str = {% for message in messages %}{{'<|im_...\n","llama_model_loader: - kv 28: general.quantization_version u32 = 2\n","llama_model_loader: - type f32: 66 tensors\n","llama_model_loader: - type f16: 226 tensors\n","[ 1/ 292] rope_freqs.weight - [ 64, 1, 1, 1], type = f32, size = 0.000 MB\n","[ 2/ 292] token_embd.weight - [ 4096, 128258, 1, 1], type = f16, converting to q4_K .. size = 1002.02 MiB -> 281.82 MiB\n","[ 3/ 292] blk.0.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 4/ 292] blk.0.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 5/ 292] blk.0.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 6/ 292] blk.0.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 7/ 292] blk.0.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 8/ 292] blk.0.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 9/ 292] blk.0.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 10/ 292] blk.0.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 11/ 292] blk.0.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 12/ 292] blk.1.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 13/ 292] blk.1.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 14/ 292] blk.1.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 15/ 292] blk.1.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 16/ 292] blk.1.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 17/ 292] blk.1.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 18/ 292] blk.1.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 19/ 292] blk.1.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 20/ 292] blk.1.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 21/ 292] blk.2.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 22/ 292] blk.2.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 23/ 292] blk.2.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 24/ 292] blk.2.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 25/ 292] blk.2.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 26/ 292] blk.2.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 27/ 292] blk.2.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 28/ 292] blk.2.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 29/ 292] blk.2.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 30/ 292] blk.3.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 31/ 292] blk.3.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 32/ 292] blk.3.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 33/ 292] blk.3.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 34/ 292] blk.3.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 35/ 292] blk.3.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 36/ 292] blk.3.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 37/ 292] blk.3.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 38/ 292] blk.3.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 39/ 292] blk.4.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 40/ 292] blk.4.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 41/ 292] blk.4.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 42/ 292] blk.4.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 43/ 292] blk.4.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 44/ 292] blk.4.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 45/ 292] blk.4.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 46/ 292] blk.4.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 47/ 292] blk.4.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 48/ 292] blk.5.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 49/ 292] blk.5.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 50/ 292] blk.5.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 51/ 292] blk.5.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 52/ 292] blk.5.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 53/ 292] blk.5.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 54/ 292] blk.5.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 55/ 292] blk.5.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 56/ 292] blk.5.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 57/ 292] blk.6.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 58/ 292] blk.6.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 59/ 292] blk.6.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 60/ 292] blk.6.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 61/ 292] blk.6.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 62/ 292] blk.6.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 63/ 292] blk.6.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 64/ 292] blk.6.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 65/ 292] blk.6.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 66/ 292] blk.7.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 67/ 292] blk.7.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 68/ 292] blk.7.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 69/ 292] blk.7.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 70/ 292] blk.7.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 71/ 292] blk.7.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 72/ 292] blk.7.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 73/ 292] blk.7.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 74/ 292] blk.7.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 75/ 292] blk.8.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 76/ 292] blk.8.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 77/ 292] blk.8.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 78/ 292] blk.8.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 79/ 292] blk.8.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 80/ 292] blk.8.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 81/ 292] blk.8.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 82/ 292] blk.8.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 83/ 292] blk.8.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 84/ 292] blk.10.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 85/ 292] blk.10.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 86/ 292] blk.10.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 87/ 292] blk.10.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 88/ 292] blk.10.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 89/ 292] blk.10.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 90/ 292] blk.10.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 91/ 292] blk.10.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 92/ 292] blk.10.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 93/ 292] blk.11.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 94/ 292] blk.11.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 95/ 292] blk.11.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 96/ 292] blk.11.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 97/ 292] blk.11.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 98/ 292] blk.11.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 99/ 292] blk.11.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 100/ 292] blk.11.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 101/ 292] blk.11.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 102/ 292] blk.12.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 103/ 292] blk.12.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 104/ 292] blk.12.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 105/ 292] blk.12.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 106/ 292] blk.12.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 107/ 292] blk.12.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 108/ 292] blk.12.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 109/ 292] blk.12.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 110/ 292] blk.12.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 111/ 292] blk.13.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 112/ 292] blk.13.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 113/ 292] blk.13.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 114/ 292] blk.13.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 115/ 292] blk.13.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 116/ 292] blk.13.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 117/ 292] blk.13.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 118/ 292] blk.13.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 119/ 292] blk.13.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 120/ 292] blk.14.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 121/ 292] blk.14.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 122/ 292] blk.14.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 123/ 292] blk.14.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 124/ 292] blk.14.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 125/ 292] blk.14.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 126/ 292] blk.14.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 127/ 292] blk.14.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 128/ 292] blk.14.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 129/ 292] blk.15.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 130/ 292] blk.15.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 131/ 292] blk.15.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 132/ 292] blk.15.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 133/ 292] blk.15.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 134/ 292] blk.15.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 135/ 292] blk.15.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 136/ 292] blk.15.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 137/ 292] blk.15.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 138/ 292] blk.16.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 139/ 292] blk.16.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 140/ 292] blk.16.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 141/ 292] blk.16.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 142/ 292] blk.16.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 143/ 292] blk.16.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 144/ 292] blk.16.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 145/ 292] blk.16.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 146/ 292] blk.16.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 147/ 292] blk.17.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 148/ 292] blk.17.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 149/ 292] blk.17.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 150/ 292] blk.17.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 151/ 292] blk.17.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 152/ 292] blk.17.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 153/ 292] blk.17.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 154/ 292] blk.17.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 155/ 292] blk.17.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 156/ 292] blk.18.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 157/ 292] blk.18.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 158/ 292] blk.18.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 159/ 292] blk.18.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 160/ 292] blk.18.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 161/ 292] blk.18.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 162/ 292] blk.18.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 163/ 292] blk.18.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 164/ 292] blk.18.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 165/ 292] blk.19.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 166/ 292] blk.19.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 167/ 292] blk.19.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 168/ 292] blk.19.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 169/ 292] blk.19.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 170/ 292] blk.19.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 171/ 292] blk.19.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 172/ 292] blk.19.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 173/ 292] blk.19.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 174/ 292] blk.20.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 175/ 292] blk.20.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 176/ 292] blk.20.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 177/ 292] blk.20.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 178/ 292] blk.20.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 179/ 292] blk.9.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 180/ 292] blk.9.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 181/ 292] blk.9.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 182/ 292] blk.9.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 183/ 292] blk.9.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 184/ 292] blk.9.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 185/ 292] blk.9.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 186/ 292] blk.9.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 187/ 292] blk.9.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 188/ 292] blk.20.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 189/ 292] blk.20.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 190/ 292] blk.20.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 191/ 292] blk.20.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 192/ 292] blk.21.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 193/ 292] blk.21.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 194/ 292] blk.21.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 195/ 292] blk.21.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 196/ 292] blk.21.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 197/ 292] blk.21.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 198/ 292] blk.21.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 199/ 292] blk.21.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 200/ 292] blk.21.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 201/ 292] blk.22.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 202/ 292] blk.22.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 203/ 292] blk.22.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 204/ 292] blk.22.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 205/ 292] blk.22.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 206/ 292] blk.22.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 207/ 292] blk.22.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 208/ 292] blk.22.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 209/ 292] blk.22.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 210/ 292] blk.23.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 211/ 292] blk.23.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 212/ 292] blk.23.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 213/ 292] blk.23.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 214/ 292] blk.23.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 215/ 292] blk.23.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 216/ 292] blk.23.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 217/ 292] blk.23.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 218/ 292] blk.23.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 219/ 292] blk.24.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 220/ 292] blk.24.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 221/ 292] blk.24.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 222/ 292] blk.24.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 223/ 292] blk.24.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 224/ 292] blk.24.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 225/ 292] blk.24.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 226/ 292] blk.24.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 227/ 292] blk.24.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 228/ 292] blk.25.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 229/ 292] blk.25.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 230/ 292] blk.25.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 231/ 292] blk.25.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 232/ 292] blk.25.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 233/ 292] blk.25.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 234/ 292] blk.25.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 235/ 292] blk.25.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 236/ 292] blk.25.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 237/ 292] blk.26.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 238/ 292] blk.26.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 239/ 292] blk.26.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 240/ 292] blk.26.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 241/ 292] blk.26.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 242/ 292] blk.26.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 243/ 292] blk.26.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 244/ 292] blk.26.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 245/ 292] blk.26.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 246/ 292] blk.27.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 247/ 292] blk.27.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 248/ 292] blk.27.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 249/ 292] blk.27.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 250/ 292] blk.27.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 251/ 292] blk.27.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 252/ 292] blk.27.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 253/ 292] blk.27.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 254/ 292] blk.27.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 255/ 292] blk.28.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 256/ 292] blk.28.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 257/ 292] blk.28.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 258/ 292] blk.28.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 259/ 292] blk.28.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 260/ 292] blk.28.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 261/ 292] blk.28.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 262/ 292] blk.28.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 263/ 292] blk.28.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 264/ 292] blk.29.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 265/ 292] blk.29.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 266/ 292] blk.29.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 267/ 292] blk.29.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 268/ 292] blk.29.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 269/ 292] blk.29.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 270/ 292] blk.29.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 271/ 292] blk.29.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 272/ 292] blk.29.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 273/ 292] blk.30.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 274/ 292] blk.30.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 275/ 292] blk.30.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 276/ 292] blk.30.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 277/ 292] blk.30.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 278/ 292] blk.30.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 279/ 292] blk.30.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 280/ 292] blk.30.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 281/ 292] blk.30.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 282/ 292] blk.31.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 283/ 292] blk.31.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q4_K .. size = 112.00 MiB -> 31.50 MiB\n","[ 284/ 292] blk.31.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q4_K .. size = 8.00 MiB -> 2.25 MiB\n","[ 285/ 292] blk.31.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 286/ 292] blk.31.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q4_K .. size = 32.00 MiB -> 9.00 MiB\n","[ 287/ 292] blk.31.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 288/ 292] output.weight - [ 4096, 128258, 1, 1], type = f16, converting to q6_K .. size = 1002.02 MiB -> 410.98 MiB\n","[ 289/ 292] blk.31.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 290/ 292] blk.31.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 291/ 292] blk.31.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 292/ 292] output_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","llama_model_quantize_internal: model size = 15317.05 MB\n","llama_model_quantize_internal: quant size = 4685.32 MB\n","\n","main: quantize time = 477445.76 ms\n","main: total time = 477445.76 ms\n"]}],"source":["%cd /kaggle/working/\n","\n","!./llama.cpp/llama-quantize /kaggle/input/llama-3-llm-to-gguf/llama-3.1-8b-chat-math-teacher.gguf llama-3.1-8b-chat-math-teacher-Q4_K_M.gguf Q4_K_M"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-10-21T16:00:12.636327Z","iopub.status.busy":"2024-10-21T16:00:12.635944Z","iopub.status.idle":"2024-10-21T16:00:26.605944Z","shell.execute_reply":"2024-10-21T16:00:26.605019Z","shell.execute_reply.started":"2024-10-21T16:00:12.636283Z"},"trusted":true},"outputs":[],"source":["from huggingface_hub import login\n","from kaggle_secrets import UserSecretsClient\n","from huggingface_hub import HfApi\n","user_secrets = UserSecretsClient()\n","hf_token = user_secrets.get_secret(\"HF_TOKEN\")\n","login(token = hf_token)\n","\n","api = HfApi()\n","api.upload_file(\n"," path_or_fileobj=\"/kaggle/working/llama-3.1-8b-chat-math-teacher-Q4_K_M.gguf\",\n"," path_in_repo=\"llama-3.1-8b-chat-math-teacher-Q4_K_M.gguf\",\n"," repo_id=\"ccapo/llama-3.1-8b-chat-math-teacher-GGUF\",\n"," repo_type=\"model\",\n",")"]},{"cell_type":"code","execution_count":3,"metadata":{"execution":{"iopub.execute_input":"2024-10-21T20:01:23.254013Z","iopub.status.busy":"2024-10-21T20:01:23.253672Z","iopub.status.idle":"2024-10-21T20:08:36.313423Z","shell.execute_reply":"2024-10-21T20:08:36.312210Z","shell.execute_reply.started":"2024-10-21T20:01:23.253978Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["/kaggle/working\n","main: build = 1 (994cfb1)\n","main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n","main: quantizing '/kaggle/input/llama-3-llm-to-gguf/llama-3.1-8b-chat-math-teacher.gguf' to 'llama-3.1-8b-chat-math-teacher-Q5_K_M.gguf' as Q5_K_M\n","llama_model_loader: loaded meta data with 29 key-value pairs and 292 tensors from /kaggle/input/llama-3-llm-to-gguf/llama-3.1-8b-chat-math-teacher.gguf (version GGUF V3 (latest))\n","llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n","llama_model_loader: - kv 0: general.architecture str = llama\n","llama_model_loader: - kv 1: general.type str = model\n","llama_model_loader: - kv 2: general.name str = Llama 3.1 8b Chat Math Teacher\n","llama_model_loader: - kv 3: general.finetune str = chat-math-teacher\n","llama_model_loader: - kv 4: general.basename str = llama-3.1\n","llama_model_loader: - kv 5: general.size_label str = 8B\n","llama_model_loader: - kv 6: llama.block_count u32 = 32\n","llama_model_loader: - kv 7: llama.context_length u32 = 131072\n","llama_model_loader: - kv 8: llama.embedding_length u32 = 4096\n","llama_model_loader: - kv 9: llama.feed_forward_length u32 = 14336\n","llama_model_loader: - kv 10: llama.attention.head_count u32 = 32\n","llama_model_loader: - kv 11: llama.attention.head_count_kv u32 = 8\n","llama_model_loader: - kv 12: llama.rope.freq_base f32 = 500000.000000\n","llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010\n","llama_model_loader: - kv 14: llama.attention.key_length u32 = 128\n","llama_model_loader: - kv 15: llama.attention.value_length u32 = 128\n","llama_model_loader: - kv 16: general.file_type u32 = 1\n","llama_model_loader: - kv 17: llama.vocab_size u32 = 128258\n","llama_model_loader: - kv 18: llama.rope.dimension_count u32 = 128\n","llama_model_loader: - kv 19: tokenizer.ggml.model str = gpt2\n","llama_model_loader: - kv 20: tokenizer.ggml.pre str = llama-bpe\n","llama_model_loader: - kv 21: tokenizer.ggml.tokens arr[str,128258] = [\"!\", \"\\\"\", \"#\", \"$\", \"%\", \"&\", \"'\", ...\n","llama_model_loader: - kv 22: tokenizer.ggml.token_type arr[i32,128258] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...\n","llama_model_loader: - kv 23: tokenizer.ggml.merges arr[str,280147] = [\"Ġ Ġ\", \"Ġ ĠĠĠ\", \"ĠĠ ĠĠ\", \"...\n","llama_model_loader: - kv 24: tokenizer.ggml.bos_token_id u32 = 128256\n","llama_model_loader: - kv 25: tokenizer.ggml.eos_token_id u32 = 128257\n","llama_model_loader: - kv 26: tokenizer.ggml.padding_token_id u32 = 128257\n","llama_model_loader: - kv 27: tokenizer.chat_template str = {% for message in messages %}{{'<|im_...\n","llama_model_loader: - kv 28: general.quantization_version u32 = 2\n","llama_model_loader: - type f32: 66 tensors\n","llama_model_loader: - type f16: 226 tensors\n","[ 1/ 292] rope_freqs.weight - [ 64, 1, 1, 1], type = f32, size = 0.000 MB\n","[ 2/ 292] token_embd.weight - [ 4096, 128258, 1, 1], type = f16, converting to q5_K .. size = 1002.02 MiB -> 344.44 MiB\n","[ 3/ 292] blk.0.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 4/ 292] blk.0.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 5/ 292] blk.0.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 6/ 292] blk.0.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 7/ 292] blk.0.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 8/ 292] blk.0.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 9/ 292] blk.0.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 10/ 292] blk.0.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 11/ 292] blk.0.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 12/ 292] blk.1.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 13/ 292] blk.1.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 14/ 292] blk.1.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 15/ 292] blk.1.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 16/ 292] blk.1.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 17/ 292] blk.1.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 18/ 292] blk.1.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 19/ 292] blk.1.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 20/ 292] blk.1.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 21/ 292] blk.2.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 22/ 292] blk.2.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 23/ 292] blk.2.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 24/ 292] blk.2.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 25/ 292] blk.2.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 26/ 292] blk.2.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 27/ 292] blk.2.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 28/ 292] blk.2.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 29/ 292] blk.2.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 30/ 292] blk.3.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 31/ 292] blk.3.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 32/ 292] blk.3.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 33/ 292] blk.3.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 34/ 292] blk.3.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 35/ 292] blk.3.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 36/ 292] blk.3.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 37/ 292] blk.3.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 38/ 292] blk.3.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 39/ 292] blk.4.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 40/ 292] blk.4.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 41/ 292] blk.4.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 42/ 292] blk.4.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 43/ 292] blk.4.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 44/ 292] blk.4.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 45/ 292] blk.4.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 46/ 292] blk.4.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 47/ 292] blk.4.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 48/ 292] blk.5.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 49/ 292] blk.5.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 50/ 292] blk.5.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 51/ 292] blk.5.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 52/ 292] blk.5.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 53/ 292] blk.5.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 54/ 292] blk.5.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 55/ 292] blk.5.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 56/ 292] blk.5.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 57/ 292] blk.6.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 58/ 292] blk.6.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 59/ 292] blk.6.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 60/ 292] blk.6.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 61/ 292] blk.6.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 62/ 292] blk.6.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 63/ 292] blk.6.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 64/ 292] blk.6.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 65/ 292] blk.6.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 66/ 292] blk.7.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 67/ 292] blk.7.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 68/ 292] blk.7.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 69/ 292] blk.7.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 70/ 292] blk.7.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 71/ 292] blk.7.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 72/ 292] blk.7.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 73/ 292] blk.7.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 74/ 292] blk.7.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 75/ 292] blk.8.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 76/ 292] blk.8.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 77/ 292] blk.8.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 78/ 292] blk.8.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 79/ 292] blk.8.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 80/ 292] blk.8.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 81/ 292] blk.8.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 82/ 292] blk.8.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 83/ 292] blk.8.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 84/ 292] blk.10.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 85/ 292] blk.10.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 86/ 292] blk.10.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 87/ 292] blk.10.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 88/ 292] blk.10.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 89/ 292] blk.10.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 90/ 292] blk.10.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 91/ 292] blk.10.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 92/ 292] blk.10.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 93/ 292] blk.11.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 94/ 292] blk.11.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 95/ 292] blk.11.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 96/ 292] blk.11.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 97/ 292] blk.11.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 98/ 292] blk.11.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 99/ 292] blk.11.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 100/ 292] blk.11.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 101/ 292] blk.11.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 102/ 292] blk.12.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 103/ 292] blk.12.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 104/ 292] blk.12.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 105/ 292] blk.12.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 106/ 292] blk.12.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 107/ 292] blk.12.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 108/ 292] blk.12.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 109/ 292] blk.12.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 110/ 292] blk.12.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 111/ 292] blk.13.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 112/ 292] blk.13.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 113/ 292] blk.13.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 114/ 292] blk.13.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 115/ 292] blk.13.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 116/ 292] blk.13.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 117/ 292] blk.13.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 118/ 292] blk.13.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 119/ 292] blk.13.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 120/ 292] blk.14.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 121/ 292] blk.14.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 122/ 292] blk.14.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 123/ 292] blk.14.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 124/ 292] blk.14.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 125/ 292] blk.14.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 126/ 292] blk.14.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 127/ 292] blk.14.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 128/ 292] blk.14.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 129/ 292] blk.15.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 130/ 292] blk.15.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 131/ 292] blk.15.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 132/ 292] blk.15.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 133/ 292] blk.15.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 134/ 292] blk.15.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 135/ 292] blk.15.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 136/ 292] blk.15.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 137/ 292] blk.15.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 138/ 292] blk.16.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 139/ 292] blk.16.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 140/ 292] blk.16.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 141/ 292] blk.16.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 142/ 292] blk.16.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 143/ 292] blk.16.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 144/ 292] blk.16.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 145/ 292] blk.16.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 146/ 292] blk.16.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 147/ 292] blk.17.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 148/ 292] blk.17.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 149/ 292] blk.17.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 150/ 292] blk.17.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 151/ 292] blk.17.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 152/ 292] blk.17.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 153/ 292] blk.17.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 154/ 292] blk.17.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 155/ 292] blk.17.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 156/ 292] blk.18.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 157/ 292] blk.18.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 158/ 292] blk.18.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 159/ 292] blk.18.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 160/ 292] blk.18.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 161/ 292] blk.18.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 162/ 292] blk.18.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 163/ 292] blk.18.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 164/ 292] blk.18.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 165/ 292] blk.19.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 166/ 292] blk.19.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 167/ 292] blk.19.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 168/ 292] blk.19.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 169/ 292] blk.19.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 170/ 292] blk.19.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 171/ 292] blk.19.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 172/ 292] blk.19.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 173/ 292] blk.19.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 174/ 292] blk.20.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 175/ 292] blk.20.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 176/ 292] blk.20.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 177/ 292] blk.20.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 178/ 292] blk.20.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 179/ 292] blk.9.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 180/ 292] blk.9.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 181/ 292] blk.9.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 182/ 292] blk.9.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 183/ 292] blk.9.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 184/ 292] blk.9.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 185/ 292] blk.9.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 186/ 292] blk.9.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 187/ 292] blk.9.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 188/ 292] blk.20.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 189/ 292] blk.20.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 190/ 292] blk.20.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 191/ 292] blk.20.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 192/ 292] blk.21.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 193/ 292] blk.21.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 194/ 292] blk.21.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 195/ 292] blk.21.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 196/ 292] blk.21.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 197/ 292] blk.21.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 198/ 292] blk.21.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 199/ 292] blk.21.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 200/ 292] blk.21.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 201/ 292] blk.22.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 202/ 292] blk.22.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 203/ 292] blk.22.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 204/ 292] blk.22.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 205/ 292] blk.22.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 206/ 292] blk.22.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 207/ 292] blk.22.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 208/ 292] blk.22.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 209/ 292] blk.22.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 210/ 292] blk.23.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 211/ 292] blk.23.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 212/ 292] blk.23.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 213/ 292] blk.23.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 214/ 292] blk.23.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 215/ 292] blk.23.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 216/ 292] blk.23.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 217/ 292] blk.23.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 218/ 292] blk.23.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 219/ 292] blk.24.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 220/ 292] blk.24.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 221/ 292] blk.24.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 222/ 292] blk.24.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 223/ 292] blk.24.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 224/ 292] blk.24.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 225/ 292] blk.24.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 226/ 292] blk.24.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 227/ 292] blk.24.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 228/ 292] blk.25.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 229/ 292] blk.25.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 230/ 292] blk.25.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 231/ 292] blk.25.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 232/ 292] blk.25.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 233/ 292] blk.25.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 234/ 292] blk.25.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 235/ 292] blk.25.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 236/ 292] blk.25.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 237/ 292] blk.26.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 238/ 292] blk.26.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 239/ 292] blk.26.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 240/ 292] blk.26.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 241/ 292] blk.26.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 242/ 292] blk.26.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 243/ 292] blk.26.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 244/ 292] blk.26.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 245/ 292] blk.26.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 246/ 292] blk.27.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 247/ 292] blk.27.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 248/ 292] blk.27.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 249/ 292] blk.27.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 250/ 292] blk.27.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 251/ 292] blk.27.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 252/ 292] blk.27.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 253/ 292] blk.27.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 254/ 292] blk.27.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 255/ 292] blk.28.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 256/ 292] blk.28.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 257/ 292] blk.28.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 258/ 292] blk.28.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 259/ 292] blk.28.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 260/ 292] blk.28.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 261/ 292] blk.28.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 262/ 292] blk.28.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 263/ 292] blk.28.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 264/ 292] blk.29.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 265/ 292] blk.29.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 266/ 292] blk.29.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 267/ 292] blk.29.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 268/ 292] blk.29.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 269/ 292] blk.29.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 270/ 292] blk.29.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 271/ 292] blk.29.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 272/ 292] blk.29.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 273/ 292] blk.30.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 274/ 292] blk.30.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 275/ 292] blk.30.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 276/ 292] blk.30.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 277/ 292] blk.30.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 278/ 292] blk.30.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 279/ 292] blk.30.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 280/ 292] blk.30.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 281/ 292] blk.30.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 282/ 292] blk.31.ffn_gate.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 283/ 292] blk.31.ffn_up.weight - [ 4096, 14336, 1, 1], type = f16, converting to q5_K .. size = 112.00 MiB -> 38.50 MiB\n","[ 284/ 292] blk.31.attn_k.weight - [ 4096, 1024, 1, 1], type = f16, converting to q5_K .. size = 8.00 MiB -> 2.75 MiB\n","[ 285/ 292] blk.31.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 286/ 292] blk.31.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, converting to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 287/ 292] blk.31.attn_v.weight - [ 4096, 1024, 1, 1], type = f16, converting to q6_K .. size = 8.00 MiB -> 3.28 MiB\n","[ 288/ 292] output.weight - [ 4096, 128258, 1, 1], type = f16, converting to q6_K .. size = 1002.02 MiB -> 410.98 MiB\n","[ 289/ 292] blk.31.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 290/ 292] blk.31.ffn_down.weight - [14336, 4096, 1, 1], type = f16, converting to q6_K .. size = 112.00 MiB -> 45.94 MiB\n","[ 291/ 292] blk.31.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 292/ 292] output_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","llama_model_quantize_internal: model size = 15317.05 MB\n","llama_model_quantize_internal: quant size = 5459.94 MB\n","\n","main: quantize time = 431584.61 ms\n","main: total time = 431584.61 ms\n"]}],"source":["%cd /kaggle/working/\n","\n","!./llama.cpp/llama-quantize /kaggle/input/llama-3-llm-to-gguf/llama-3.1-8b-chat-math-teacher.gguf llama-3.1-8b-chat-math-teacher-Q5_K_M.gguf Q5_K_M"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-10-21T20:08:36.315241Z","iopub.status.busy":"2024-10-21T20:08:36.314900Z","iopub.status.idle":"2024-10-21T20:11:08.853556Z","shell.execute_reply":"2024-10-21T20:11:08.852606Z","shell.execute_reply.started":"2024-10-21T20:08:36.315205Z"},"trusted":true},"outputs":[],"source":["from huggingface_hub import login\n","from kaggle_secrets import UserSecretsClient\n","from huggingface_hub import HfApi\n","user_secrets = UserSecretsClient()\n","hf_token = user_secrets.get_secret(\"HF_TOKEN\")\n","login(token = hf_token)\n","\n","api = HfApi()\n","api.upload_file(\n"," path_or_fileobj=\"/kaggle/working/llama-3.1-8b-chat-math-teacher-Q5_K_M.gguf\",\n"," path_in_repo=\"llama-3.1-8b-chat-math-teacher-Q5_K_M.gguf\",\n"," repo_id=\"ccapo/llama-3.1-8b-chat-math-teacher-GGUF\",\n"," repo_type=\"model\",\n",")"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.14"}},"nbformat":4,"nbformat_minor":4}