Spaces:

ccapo
/

portfolio

Running

File size: 99,212 Bytes

564354e

{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-10-21T19:51:11.362685Z","iopub.status.busy":"2024-10-21T19:51:11.362279Z","iopub.status.idle":"2024-10-21T20:01:23.251497Z","shell.execute_reply":"2024-10-21T20:01:23.250324Z","shell.execute_reply.started":"2024-10-21T19:51:11.362647Z"},"trusted":true},"outputs":[],"source":["%cd /kaggle/working\n","!git clone --depth=1 https://github.com/ggerganov/llama.cpp.git\n","%cd /kaggle/working/llama.cpp\n","!sed -i 's|MK_LDFLAGS   += -lcuda|MK_LDFLAGS   += -L/usr/local/nvidia/lib64 -lcuda|' Makefile\n","!LLAMA_CUDA=1 conda run -n base make -j > /dev/null"]},{"cell_type":"code","execution_count":4,"metadata":{"execution":{"iopub.execute_input":"2024-10-21T15:49:55.214707Z","iopub.status.busy":"2024-10-21T15:49:55.214216Z","iopub.status.idle":"2024-10-21T15:57:54.086080Z","shell.execute_reply":"2024-10-21T15:57:54.084916Z","shell.execute_reply.started":"2024-10-21T15:49:55.214666Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["/kaggle/working\n","main: build = 1 (f594bc8)\n","main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n","main: quantizing '/kaggle/input/llama-3-llm-to-gguf/llama-3.1-8b-chat-math-teacher.gguf' to 'llama-3.1-8b-chat-math-teacher-Q4_K_M.gguf' as Q4_K_M\n","llama_model_loader: loaded meta data with 29 key-value pairs and 292 tensors from /kaggle/input/llama-3-llm-to-gguf/llama-3.1-8b-chat-math-teacher.gguf (version GGUF V3 (latest))\n","llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n","llama_model_loader: - kv   0:                       general.architecture str              = llama\n","llama_model_loader: - kv   1:                               general.type str              = model\n","llama_model_loader: - kv   2:                               general.name str              = Llama 3.1 8b Chat Math Teacher\n","llama_model_loader: - kv   3:                           general.finetune str              = chat-math-teacher\n","llama_model_loader: - kv   4:                           general.basename str              = llama-3.1\n","llama_model_loader: - kv   5:                         general.size_label str              = 8B\n","llama_model_loader: - kv   6:                          llama.block_count u32              = 32\n","llama_model_loader: - kv   7:                       llama.context_length u32              = 131072\n","llama_model_loader: - kv   8:                     llama.embedding_length u32              = 4096\n","llama_model_loader: - kv   9:                  llama.feed_forward_length u32              = 14336\n","llama_model_loader: - kv  10:                 llama.attention.head_count u32              = 32\n","llama_model_loader: - kv  11:              llama.attention.head_count_kv u32              = 8\n","llama_model_loader: - kv  12:                       llama.rope.freq_base f32              = 500000.000000\n","llama_model_loader: - kv  13:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010\n","llama_model_loader: - kv  14:                 llama.attention.key_length u32              = 128\n","llama_model_loader: - kv  15:               llama.attention.value_length u32              = 128\n","llama_model_loader: - kv  16:                          general.file_type u32              = 1\n","llama_model_loader: - kv  17:                           llama.vocab_size u32              = 128258\n","llama_model_loader: - kv  18:                 llama.rope.dimension_count u32              = 128\n","llama_model_loader: - kv  19:                       tokenizer.ggml.model str              = gpt2\n","llama_model_loader: - kv  20:                         tokenizer.ggml.pre str              = llama-bpe\n","llama_model_loader: - kv  21:                      tokenizer.ggml.tokens arr[str,128258]  = [\"!\", \"\\\"\", \"#\", \"$\", \"%\", \"&\", \"'\", ...\n","llama_model_loader: - kv  22:                  tokenizer.ggml.token_type arr[i32,128258]  = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...\n","llama_model_loader: - kv  23:                      tokenizer.ggml.merges arr[str,280147]  = [\"Ġ Ġ\", \"Ġ ĠĠĠ\", \"ĠĠ ĠĠ\", \"...\n","llama_model_loader: - kv  24:                tokenizer.ggml.bos_token_id u32              = 128256\n","llama_model_loader: - kv  25:                tokenizer.ggml.eos_token_id u32              = 128257\n","llama_model_loader: - kv  26:            tokenizer.ggml.padding_token_id u32              = 128257\n","llama_model_loader: - kv  27:                    tokenizer.chat_template str              = {% for message in messages %}{{'<|im_...\n","llama_model_loader: - kv  28:               general.quantization_version u32              = 2\n","llama_model_loader: - type  f32:   66 tensors\n","llama_model_loader: - type  f16:  226 tensors\n","[   1/ 292]                    rope_freqs.weight - [   64,     1,     1,     1], type =    f32, size =    0.000 MB\n","[   2/ 292]                    token_embd.weight - [ 4096, 128258,     1,     1], type =    f16, converting to q4_K .. size =  1002.02 MiB ->   281.82 MiB\n","[   3/ 292]               blk.0.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[   4/ 292]                blk.0.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[   5/ 292]                blk.0.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[   6/ 292]                  blk.0.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[   7/ 292]                blk.0.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[   8/ 292]                  blk.0.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[   9/ 292]             blk.0.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  10/ 292]                  blk.0.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  11/ 292]                  blk.0.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[  12/ 292]               blk.1.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  13/ 292]                blk.1.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[  14/ 292]                blk.1.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  15/ 292]                  blk.1.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  16/ 292]                blk.1.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  17/ 292]                  blk.1.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[  18/ 292]             blk.1.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  19/ 292]                  blk.1.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  20/ 292]                  blk.1.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[  21/ 292]               blk.2.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  22/ 292]                blk.2.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[  23/ 292]                blk.2.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  24/ 292]                  blk.2.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  25/ 292]                blk.2.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  26/ 292]                  blk.2.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[  27/ 292]             blk.2.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  28/ 292]                  blk.2.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  29/ 292]                  blk.2.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[  30/ 292]               blk.3.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  31/ 292]                blk.3.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[  32/ 292]                blk.3.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  33/ 292]                  blk.3.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  34/ 292]                blk.3.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  35/ 292]                  blk.3.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[  36/ 292]             blk.3.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  37/ 292]                  blk.3.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  38/ 292]                  blk.3.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[  39/ 292]               blk.4.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  40/ 292]                blk.4.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  41/ 292]                blk.4.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  42/ 292]                  blk.4.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  43/ 292]                blk.4.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  44/ 292]                  blk.4.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[  45/ 292]             blk.4.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  46/ 292]                  blk.4.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  47/ 292]                  blk.4.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[  48/ 292]               blk.5.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  49/ 292]                blk.5.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  50/ 292]                blk.5.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  51/ 292]                  blk.5.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  52/ 292]                blk.5.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  53/ 292]                  blk.5.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[  54/ 292]             blk.5.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  55/ 292]                  blk.5.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  56/ 292]                  blk.5.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[  57/ 292]               blk.6.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  58/ 292]                blk.6.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[  59/ 292]                blk.6.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  60/ 292]                  blk.6.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  61/ 292]                blk.6.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  62/ 292]                  blk.6.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[  63/ 292]             blk.6.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  64/ 292]                  blk.6.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  65/ 292]                  blk.6.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[  66/ 292]               blk.7.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  67/ 292]                blk.7.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  68/ 292]                blk.7.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  69/ 292]                  blk.7.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  70/ 292]                blk.7.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  71/ 292]                  blk.7.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[  72/ 292]             blk.7.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  73/ 292]                  blk.7.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  74/ 292]                  blk.7.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[  75/ 292]               blk.8.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  76/ 292]                blk.8.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  77/ 292]                blk.8.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  78/ 292]                  blk.8.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  79/ 292]                blk.8.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  80/ 292]                  blk.8.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[  81/ 292]             blk.8.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  82/ 292]                  blk.8.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  83/ 292]                  blk.8.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[  84/ 292]              blk.10.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  85/ 292]               blk.10.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[  86/ 292]               blk.10.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  87/ 292]                 blk.10.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  88/ 292]               blk.10.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  89/ 292]                 blk.10.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[  90/ 292]            blk.10.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  91/ 292]                 blk.10.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[  92/ 292]                 blk.10.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[  93/ 292]              blk.11.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  94/ 292]               blk.11.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  95/ 292]               blk.11.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  96/ 292]                 blk.11.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[  97/ 292]               blk.11.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  98/ 292]                 blk.11.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[  99/ 292]            blk.11.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 100/ 292]                 blk.11.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 101/ 292]                 blk.11.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 102/ 292]              blk.12.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 103/ 292]               blk.12.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 104/ 292]               blk.12.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 105/ 292]                 blk.12.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 106/ 292]               blk.12.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 107/ 292]                 blk.12.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 108/ 292]            blk.12.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 109/ 292]                 blk.12.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 110/ 292]                 blk.12.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 111/ 292]              blk.13.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 112/ 292]               blk.13.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 113/ 292]               blk.13.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 114/ 292]                 blk.13.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 115/ 292]               blk.13.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 116/ 292]                 blk.13.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 117/ 292]            blk.13.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 118/ 292]                 blk.13.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 119/ 292]                 blk.13.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 120/ 292]              blk.14.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 121/ 292]               blk.14.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 122/ 292]               blk.14.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 123/ 292]                 blk.14.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 124/ 292]               blk.14.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 125/ 292]                 blk.14.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 126/ 292]            blk.14.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 127/ 292]                 blk.14.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 128/ 292]                 blk.14.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 129/ 292]              blk.15.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 130/ 292]               blk.15.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 131/ 292]               blk.15.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 132/ 292]                 blk.15.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 133/ 292]               blk.15.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 134/ 292]                 blk.15.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 135/ 292]            blk.15.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 136/ 292]                 blk.15.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 137/ 292]                 blk.15.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 138/ 292]              blk.16.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 139/ 292]               blk.16.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 140/ 292]               blk.16.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 141/ 292]                 blk.16.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 142/ 292]               blk.16.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 143/ 292]                 blk.16.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 144/ 292]            blk.16.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 145/ 292]                 blk.16.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 146/ 292]                 blk.16.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 147/ 292]              blk.17.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 148/ 292]               blk.17.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 149/ 292]               blk.17.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 150/ 292]                 blk.17.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 151/ 292]               blk.17.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 152/ 292]                 blk.17.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 153/ 292]            blk.17.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 154/ 292]                 blk.17.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 155/ 292]                 blk.17.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 156/ 292]              blk.18.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 157/ 292]               blk.18.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 158/ 292]               blk.18.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 159/ 292]                 blk.18.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 160/ 292]               blk.18.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 161/ 292]                 blk.18.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 162/ 292]            blk.18.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 163/ 292]                 blk.18.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 164/ 292]                 blk.18.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 165/ 292]              blk.19.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 166/ 292]               blk.19.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 167/ 292]               blk.19.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 168/ 292]                 blk.19.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 169/ 292]               blk.19.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 170/ 292]                 blk.19.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 171/ 292]            blk.19.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 172/ 292]                 blk.19.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 173/ 292]                 blk.19.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 174/ 292]               blk.20.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 175/ 292]                 blk.20.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 176/ 292]            blk.20.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 177/ 292]                 blk.20.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 178/ 292]                 blk.20.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 179/ 292]               blk.9.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 180/ 292]                blk.9.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 181/ 292]                blk.9.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 182/ 292]                  blk.9.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 183/ 292]                blk.9.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 184/ 292]                  blk.9.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 185/ 292]             blk.9.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 186/ 292]                  blk.9.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 187/ 292]                  blk.9.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 188/ 292]              blk.20.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 189/ 292]               blk.20.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 190/ 292]                 blk.20.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 191/ 292]               blk.20.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 192/ 292]              blk.21.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 193/ 292]               blk.21.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 194/ 292]               blk.21.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 195/ 292]                 blk.21.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 196/ 292]               blk.21.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 197/ 292]                 blk.21.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 198/ 292]            blk.21.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 199/ 292]                 blk.21.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 200/ 292]                 blk.21.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 201/ 292]              blk.22.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 202/ 292]               blk.22.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 203/ 292]               blk.22.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 204/ 292]                 blk.22.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 205/ 292]               blk.22.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 206/ 292]                 blk.22.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 207/ 292]            blk.22.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 208/ 292]                 blk.22.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 209/ 292]                 blk.22.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 210/ 292]              blk.23.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 211/ 292]               blk.23.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 212/ 292]               blk.23.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 213/ 292]                 blk.23.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 214/ 292]               blk.23.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 215/ 292]                 blk.23.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 216/ 292]            blk.23.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 217/ 292]                 blk.23.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 218/ 292]                 blk.23.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 219/ 292]              blk.24.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 220/ 292]               blk.24.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 221/ 292]               blk.24.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 222/ 292]                 blk.24.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 223/ 292]               blk.24.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 224/ 292]                 blk.24.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 225/ 292]            blk.24.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 226/ 292]                 blk.24.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 227/ 292]                 blk.24.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 228/ 292]              blk.25.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 229/ 292]               blk.25.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 230/ 292]               blk.25.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 231/ 292]                 blk.25.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 232/ 292]               blk.25.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 233/ 292]                 blk.25.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 234/ 292]            blk.25.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 235/ 292]                 blk.25.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 236/ 292]                 blk.25.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 237/ 292]              blk.26.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 238/ 292]               blk.26.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 239/ 292]               blk.26.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 240/ 292]                 blk.26.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 241/ 292]               blk.26.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 242/ 292]                 blk.26.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 243/ 292]            blk.26.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 244/ 292]                 blk.26.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 245/ 292]                 blk.26.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 246/ 292]              blk.27.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 247/ 292]               blk.27.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 248/ 292]               blk.27.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 249/ 292]                 blk.27.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 250/ 292]               blk.27.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 251/ 292]                 blk.27.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 252/ 292]            blk.27.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 253/ 292]                 blk.27.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 254/ 292]                 blk.27.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 255/ 292]              blk.28.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 256/ 292]               blk.28.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 257/ 292]               blk.28.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 258/ 292]                 blk.28.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 259/ 292]               blk.28.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 260/ 292]                 blk.28.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 261/ 292]            blk.28.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 262/ 292]                 blk.28.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 263/ 292]                 blk.28.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 264/ 292]              blk.29.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 265/ 292]               blk.29.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 266/ 292]               blk.29.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 267/ 292]                 blk.29.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 268/ 292]               blk.29.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 269/ 292]                 blk.29.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 270/ 292]            blk.29.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 271/ 292]                 blk.29.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 272/ 292]                 blk.29.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 273/ 292]              blk.30.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 274/ 292]               blk.30.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 275/ 292]               blk.30.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 276/ 292]                 blk.30.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 277/ 292]               blk.30.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 278/ 292]                 blk.30.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 279/ 292]            blk.30.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 280/ 292]                 blk.30.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 281/ 292]                 blk.30.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 282/ 292]               blk.31.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 283/ 292]                 blk.31.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q4_K .. size =   112.00 MiB ->    31.50 MiB\n","[ 284/ 292]                 blk.31.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q4_K .. size =     8.00 MiB ->     2.25 MiB\n","[ 285/ 292]            blk.31.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 286/ 292]                 blk.31.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q4_K .. size =    32.00 MiB ->     9.00 MiB\n","[ 287/ 292]                 blk.31.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 288/ 292]                        output.weight - [ 4096, 128258,     1,     1], type =    f16, converting to q6_K .. size =  1002.02 MiB ->   410.98 MiB\n","[ 289/ 292]              blk.31.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 290/ 292]               blk.31.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 291/ 292]               blk.31.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 292/ 292]                   output_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","llama_model_quantize_internal: model size  = 15317.05 MB\n","llama_model_quantize_internal: quant size  =  4685.32 MB\n","\n","main: quantize time = 477445.76 ms\n","main:    total time = 477445.76 ms\n"]}],"source":["%cd /kaggle/working/\n","\n","!./llama.cpp/llama-quantize /kaggle/input/llama-3-llm-to-gguf/llama-3.1-8b-chat-math-teacher.gguf llama-3.1-8b-chat-math-teacher-Q4_K_M.gguf Q4_K_M"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-10-21T16:00:12.636327Z","iopub.status.busy":"2024-10-21T16:00:12.635944Z","iopub.status.idle":"2024-10-21T16:00:26.605944Z","shell.execute_reply":"2024-10-21T16:00:26.605019Z","shell.execute_reply.started":"2024-10-21T16:00:12.636283Z"},"trusted":true},"outputs":[],"source":["from huggingface_hub import login\n","from kaggle_secrets import UserSecretsClient\n","from huggingface_hub import HfApi\n","user_secrets = UserSecretsClient()\n","hf_token = user_secrets.get_secret(\"HF_TOKEN\")\n","login(token = hf_token)\n","\n","api = HfApi()\n","api.upload_file(\n","    path_or_fileobj=\"/kaggle/working/llama-3.1-8b-chat-math-teacher-Q4_K_M.gguf\",\n","    path_in_repo=\"llama-3.1-8b-chat-math-teacher-Q4_K_M.gguf\",\n","    repo_id=\"ccapo/llama-3.1-8b-chat-math-teacher-GGUF\",\n","    repo_type=\"model\",\n",")"]},{"cell_type":"code","execution_count":3,"metadata":{"execution":{"iopub.execute_input":"2024-10-21T20:01:23.254013Z","iopub.status.busy":"2024-10-21T20:01:23.253672Z","iopub.status.idle":"2024-10-21T20:08:36.313423Z","shell.execute_reply":"2024-10-21T20:08:36.312210Z","shell.execute_reply.started":"2024-10-21T20:01:23.253978Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["/kaggle/working\n","main: build = 1 (994cfb1)\n","main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n","main: quantizing '/kaggle/input/llama-3-llm-to-gguf/llama-3.1-8b-chat-math-teacher.gguf' to 'llama-3.1-8b-chat-math-teacher-Q5_K_M.gguf' as Q5_K_M\n","llama_model_loader: loaded meta data with 29 key-value pairs and 292 tensors from /kaggle/input/llama-3-llm-to-gguf/llama-3.1-8b-chat-math-teacher.gguf (version GGUF V3 (latest))\n","llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n","llama_model_loader: - kv   0:                       general.architecture str              = llama\n","llama_model_loader: - kv   1:                               general.type str              = model\n","llama_model_loader: - kv   2:                               general.name str              = Llama 3.1 8b Chat Math Teacher\n","llama_model_loader: - kv   3:                           general.finetune str              = chat-math-teacher\n","llama_model_loader: - kv   4:                           general.basename str              = llama-3.1\n","llama_model_loader: - kv   5:                         general.size_label str              = 8B\n","llama_model_loader: - kv   6:                          llama.block_count u32              = 32\n","llama_model_loader: - kv   7:                       llama.context_length u32              = 131072\n","llama_model_loader: - kv   8:                     llama.embedding_length u32              = 4096\n","llama_model_loader: - kv   9:                  llama.feed_forward_length u32              = 14336\n","llama_model_loader: - kv  10:                 llama.attention.head_count u32              = 32\n","llama_model_loader: - kv  11:              llama.attention.head_count_kv u32              = 8\n","llama_model_loader: - kv  12:                       llama.rope.freq_base f32              = 500000.000000\n","llama_model_loader: - kv  13:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010\n","llama_model_loader: - kv  14:                 llama.attention.key_length u32              = 128\n","llama_model_loader: - kv  15:               llama.attention.value_length u32              = 128\n","llama_model_loader: - kv  16:                          general.file_type u32              = 1\n","llama_model_loader: - kv  17:                           llama.vocab_size u32              = 128258\n","llama_model_loader: - kv  18:                 llama.rope.dimension_count u32              = 128\n","llama_model_loader: - kv  19:                       tokenizer.ggml.model str              = gpt2\n","llama_model_loader: - kv  20:                         tokenizer.ggml.pre str              = llama-bpe\n","llama_model_loader: - kv  21:                      tokenizer.ggml.tokens arr[str,128258]  = [\"!\", \"\\\"\", \"#\", \"$\", \"%\", \"&\", \"'\", ...\n","llama_model_loader: - kv  22:                  tokenizer.ggml.token_type arr[i32,128258]  = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...\n","llama_model_loader: - kv  23:                      tokenizer.ggml.merges arr[str,280147]  = [\"Ġ Ġ\", \"Ġ ĠĠĠ\", \"ĠĠ ĠĠ\", \"...\n","llama_model_loader: - kv  24:                tokenizer.ggml.bos_token_id u32              = 128256\n","llama_model_loader: - kv  25:                tokenizer.ggml.eos_token_id u32              = 128257\n","llama_model_loader: - kv  26:            tokenizer.ggml.padding_token_id u32              = 128257\n","llama_model_loader: - kv  27:                    tokenizer.chat_template str              = {% for message in messages %}{{'<|im_...\n","llama_model_loader: - kv  28:               general.quantization_version u32              = 2\n","llama_model_loader: - type  f32:   66 tensors\n","llama_model_loader: - type  f16:  226 tensors\n","[   1/ 292]                    rope_freqs.weight - [   64,     1,     1,     1], type =    f32, size =    0.000 MB\n","[   2/ 292]                    token_embd.weight - [ 4096, 128258,     1,     1], type =    f16, converting to q5_K .. size =  1002.02 MiB ->   344.44 MiB\n","[   3/ 292]               blk.0.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[   4/ 292]                blk.0.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[   5/ 292]                blk.0.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[   6/ 292]                  blk.0.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[   7/ 292]                blk.0.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[   8/ 292]                  blk.0.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[   9/ 292]             blk.0.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  10/ 292]                  blk.0.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  11/ 292]                  blk.0.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[  12/ 292]               blk.1.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  13/ 292]                blk.1.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[  14/ 292]                blk.1.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  15/ 292]                  blk.1.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  16/ 292]                blk.1.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  17/ 292]                  blk.1.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[  18/ 292]             blk.1.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  19/ 292]                  blk.1.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  20/ 292]                  blk.1.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[  21/ 292]               blk.2.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  22/ 292]                blk.2.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[  23/ 292]                blk.2.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  24/ 292]                  blk.2.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  25/ 292]                blk.2.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  26/ 292]                  blk.2.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[  27/ 292]             blk.2.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  28/ 292]                  blk.2.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  29/ 292]                  blk.2.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[  30/ 292]               blk.3.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  31/ 292]                blk.3.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[  32/ 292]                blk.3.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  33/ 292]                  blk.3.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  34/ 292]                blk.3.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  35/ 292]                  blk.3.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[  36/ 292]             blk.3.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  37/ 292]                  blk.3.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  38/ 292]                  blk.3.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[  39/ 292]               blk.4.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  40/ 292]                blk.4.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  41/ 292]                blk.4.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  42/ 292]                  blk.4.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  43/ 292]                blk.4.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  44/ 292]                  blk.4.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[  45/ 292]             blk.4.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  46/ 292]                  blk.4.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  47/ 292]                  blk.4.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[  48/ 292]               blk.5.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  49/ 292]                blk.5.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  50/ 292]                blk.5.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  51/ 292]                  blk.5.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  52/ 292]                blk.5.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  53/ 292]                  blk.5.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[  54/ 292]             blk.5.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  55/ 292]                  blk.5.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  56/ 292]                  blk.5.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[  57/ 292]               blk.6.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  58/ 292]                blk.6.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[  59/ 292]                blk.6.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  60/ 292]                  blk.6.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  61/ 292]                blk.6.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  62/ 292]                  blk.6.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[  63/ 292]             blk.6.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  64/ 292]                  blk.6.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  65/ 292]                  blk.6.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[  66/ 292]               blk.7.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  67/ 292]                blk.7.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  68/ 292]                blk.7.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  69/ 292]                  blk.7.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  70/ 292]                blk.7.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  71/ 292]                  blk.7.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[  72/ 292]             blk.7.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  73/ 292]                  blk.7.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  74/ 292]                  blk.7.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[  75/ 292]               blk.8.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  76/ 292]                blk.8.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  77/ 292]                blk.8.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  78/ 292]                  blk.8.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  79/ 292]                blk.8.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  80/ 292]                  blk.8.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[  81/ 292]             blk.8.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  82/ 292]                  blk.8.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  83/ 292]                  blk.8.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[  84/ 292]              blk.10.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  85/ 292]               blk.10.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[  86/ 292]               blk.10.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  87/ 292]                 blk.10.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  88/ 292]               blk.10.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  89/ 292]                 blk.10.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[  90/ 292]            blk.10.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  91/ 292]                 blk.10.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  92/ 292]                 blk.10.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[  93/ 292]              blk.11.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  94/ 292]               blk.11.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  95/ 292]               blk.11.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  96/ 292]                 blk.11.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[  97/ 292]               blk.11.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  98/ 292]                 blk.11.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[  99/ 292]            blk.11.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 100/ 292]                 blk.11.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 101/ 292]                 blk.11.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 102/ 292]              blk.12.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 103/ 292]               blk.12.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 104/ 292]               blk.12.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 105/ 292]                 blk.12.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 106/ 292]               blk.12.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 107/ 292]                 blk.12.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 108/ 292]            blk.12.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 109/ 292]                 blk.12.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 110/ 292]                 blk.12.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 111/ 292]              blk.13.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 112/ 292]               blk.13.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 113/ 292]               blk.13.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 114/ 292]                 blk.13.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 115/ 292]               blk.13.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 116/ 292]                 blk.13.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 117/ 292]            blk.13.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 118/ 292]                 blk.13.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 119/ 292]                 blk.13.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 120/ 292]              blk.14.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 121/ 292]               blk.14.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 122/ 292]               blk.14.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 123/ 292]                 blk.14.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 124/ 292]               blk.14.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 125/ 292]                 blk.14.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 126/ 292]            blk.14.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 127/ 292]                 blk.14.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 128/ 292]                 blk.14.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 129/ 292]              blk.15.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 130/ 292]               blk.15.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 131/ 292]               blk.15.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 132/ 292]                 blk.15.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 133/ 292]               blk.15.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 134/ 292]                 blk.15.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 135/ 292]            blk.15.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 136/ 292]                 blk.15.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 137/ 292]                 blk.15.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 138/ 292]              blk.16.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 139/ 292]               blk.16.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 140/ 292]               blk.16.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 141/ 292]                 blk.16.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 142/ 292]               blk.16.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 143/ 292]                 blk.16.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 144/ 292]            blk.16.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 145/ 292]                 blk.16.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 146/ 292]                 blk.16.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 147/ 292]              blk.17.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 148/ 292]               blk.17.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 149/ 292]               blk.17.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 150/ 292]                 blk.17.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 151/ 292]               blk.17.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 152/ 292]                 blk.17.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 153/ 292]            blk.17.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 154/ 292]                 blk.17.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 155/ 292]                 blk.17.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 156/ 292]              blk.18.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 157/ 292]               blk.18.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 158/ 292]               blk.18.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 159/ 292]                 blk.18.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 160/ 292]               blk.18.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 161/ 292]                 blk.18.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 162/ 292]            blk.18.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 163/ 292]                 blk.18.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 164/ 292]                 blk.18.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 165/ 292]              blk.19.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 166/ 292]               blk.19.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 167/ 292]               blk.19.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 168/ 292]                 blk.19.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 169/ 292]               blk.19.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 170/ 292]                 blk.19.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 171/ 292]            blk.19.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 172/ 292]                 blk.19.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 173/ 292]                 blk.19.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 174/ 292]               blk.20.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 175/ 292]                 blk.20.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 176/ 292]            blk.20.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 177/ 292]                 blk.20.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 178/ 292]                 blk.20.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 179/ 292]               blk.9.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 180/ 292]                blk.9.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 181/ 292]                blk.9.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 182/ 292]                  blk.9.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 183/ 292]                blk.9.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 184/ 292]                  blk.9.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 185/ 292]             blk.9.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 186/ 292]                  blk.9.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 187/ 292]                  blk.9.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 188/ 292]              blk.20.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 189/ 292]               blk.20.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 190/ 292]                 blk.20.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 191/ 292]               blk.20.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 192/ 292]              blk.21.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 193/ 292]               blk.21.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 194/ 292]               blk.21.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 195/ 292]                 blk.21.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 196/ 292]               blk.21.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 197/ 292]                 blk.21.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 198/ 292]            blk.21.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 199/ 292]                 blk.21.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 200/ 292]                 blk.21.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 201/ 292]              blk.22.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 202/ 292]               blk.22.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 203/ 292]               blk.22.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 204/ 292]                 blk.22.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 205/ 292]               blk.22.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 206/ 292]                 blk.22.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 207/ 292]            blk.22.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 208/ 292]                 blk.22.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 209/ 292]                 blk.22.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 210/ 292]              blk.23.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 211/ 292]               blk.23.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 212/ 292]               blk.23.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 213/ 292]                 blk.23.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 214/ 292]               blk.23.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 215/ 292]                 blk.23.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 216/ 292]            blk.23.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 217/ 292]                 blk.23.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 218/ 292]                 blk.23.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 219/ 292]              blk.24.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 220/ 292]               blk.24.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 221/ 292]               blk.24.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 222/ 292]                 blk.24.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 223/ 292]               blk.24.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 224/ 292]                 blk.24.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 225/ 292]            blk.24.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 226/ 292]                 blk.24.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 227/ 292]                 blk.24.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 228/ 292]              blk.25.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 229/ 292]               blk.25.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 230/ 292]               blk.25.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 231/ 292]                 blk.25.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 232/ 292]               blk.25.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 233/ 292]                 blk.25.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 234/ 292]            blk.25.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 235/ 292]                 blk.25.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 236/ 292]                 blk.25.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 237/ 292]              blk.26.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 238/ 292]               blk.26.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 239/ 292]               blk.26.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 240/ 292]                 blk.26.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 241/ 292]               blk.26.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 242/ 292]                 blk.26.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 243/ 292]            blk.26.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 244/ 292]                 blk.26.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 245/ 292]                 blk.26.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 246/ 292]              blk.27.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 247/ 292]               blk.27.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 248/ 292]               blk.27.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 249/ 292]                 blk.27.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 250/ 292]               blk.27.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 251/ 292]                 blk.27.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 252/ 292]            blk.27.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 253/ 292]                 blk.27.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 254/ 292]                 blk.27.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 255/ 292]              blk.28.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 256/ 292]               blk.28.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 257/ 292]               blk.28.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 258/ 292]                 blk.28.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 259/ 292]               blk.28.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 260/ 292]                 blk.28.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 261/ 292]            blk.28.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 262/ 292]                 blk.28.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 263/ 292]                 blk.28.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 264/ 292]              blk.29.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 265/ 292]               blk.29.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 266/ 292]               blk.29.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 267/ 292]                 blk.29.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 268/ 292]               blk.29.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 269/ 292]                 blk.29.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 270/ 292]            blk.29.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 271/ 292]                 blk.29.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 272/ 292]                 blk.29.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 273/ 292]              blk.30.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 274/ 292]               blk.30.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 275/ 292]               blk.30.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 276/ 292]                 blk.30.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 277/ 292]               blk.30.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 278/ 292]                 blk.30.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 279/ 292]            blk.30.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 280/ 292]                 blk.30.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 281/ 292]                 blk.30.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 282/ 292]               blk.31.ffn_gate.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 283/ 292]                 blk.31.ffn_up.weight - [ 4096, 14336,     1,     1], type =    f16, converting to q5_K .. size =   112.00 MiB ->    38.50 MiB\n","[ 284/ 292]                 blk.31.attn_k.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q5_K .. size =     8.00 MiB ->     2.75 MiB\n","[ 285/ 292]            blk.31.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 286/ 292]                 blk.31.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, converting to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 287/ 292]                 blk.31.attn_v.weight - [ 4096,  1024,     1,     1], type =    f16, converting to q6_K .. size =     8.00 MiB ->     3.28 MiB\n","[ 288/ 292]                        output.weight - [ 4096, 128258,     1,     1], type =    f16, converting to q6_K .. size =  1002.02 MiB ->   410.98 MiB\n","[ 289/ 292]              blk.31.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 290/ 292]               blk.31.ffn_down.weight - [14336,  4096,     1,     1], type =    f16, converting to q6_K .. size =   112.00 MiB ->    45.94 MiB\n","[ 291/ 292]               blk.31.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 292/ 292]                   output_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","llama_model_quantize_internal: model size  = 15317.05 MB\n","llama_model_quantize_internal: quant size  =  5459.94 MB\n","\n","main: quantize time = 431584.61 ms\n","main:    total time = 431584.61 ms\n"]}],"source":["%cd /kaggle/working/\n","\n","!./llama.cpp/llama-quantize /kaggle/input/llama-3-llm-to-gguf/llama-3.1-8b-chat-math-teacher.gguf llama-3.1-8b-chat-math-teacher-Q5_K_M.gguf Q5_K_M"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-10-21T20:08:36.315241Z","iopub.status.busy":"2024-10-21T20:08:36.314900Z","iopub.status.idle":"2024-10-21T20:11:08.853556Z","shell.execute_reply":"2024-10-21T20:11:08.852606Z","shell.execute_reply.started":"2024-10-21T20:08:36.315205Z"},"trusted":true},"outputs":[],"source":["from huggingface_hub import login\n","from kaggle_secrets import UserSecretsClient\n","from huggingface_hub import HfApi\n","user_secrets = UserSecretsClient()\n","hf_token = user_secrets.get_secret(\"HF_TOKEN\")\n","login(token = hf_token)\n","\n","api = HfApi()\n","api.upload_file(\n","    path_or_fileobj=\"/kaggle/working/llama-3.1-8b-chat-math-teacher-Q5_K_M.gguf\",\n","    path_in_repo=\"llama-3.1-8b-chat-math-teacher-Q5_K_M.gguf\",\n","    repo_id=\"ccapo/llama-3.1-8b-chat-math-teacher-GGUF\",\n","    repo_type=\"model\",\n",")"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.14"}},"nbformat":4,"nbformat_minor":4}