{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "id": "hS2zWviCGv-j" }, "outputs": [], "source": [ "model_name_or_path = \"mistralai/Mixtral-8x7B-Instruct-v0.1\"#@param {type:\"string\"}\n", "\n", "temp_dir = \"/content/drive/MyDrive/tf_models\" #@param {type:\"string\"}\n", "model_name = model_name_or_path.split(\"/\")[-1]\n", "target_dir = f\"{temp_dir}/{model_name}\"\n", "save_dir = \"/content/drive/MyDrive/tf_models/mixtral-4x7b_slerp\" #@param {type:\"string\"}\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fS0Z2JFPG3w1", "outputId": "b83290b5-40da-47f8-c5cc-6d94a6c710d4" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.35.2)\n", "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.1.0+cu121)\n", "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (0.4.1)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.13.1)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.4)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.23.5)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n", "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.15.0)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.1)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.5.0)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.12)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.2.1)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.2)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2023.6.0)\n", "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.1.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.3)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.11.17)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n" ] } ], "source": [ "!pip install transformers torch safetensors" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "WwnZPGHATsqv" }, "outputs": [], "source": [ "%cd {temp_dir}\n", "save_model_dir = model_name.split('/')[-1]\n", "!mkdir -p {save_model_dir}\n", "\n", "!wget https://huggingface.co./{model_name_or_path}/resolve/main/config.json -O {save_model_dir}/config.json\n", "!wget https://huggingface.co./{model_name_or_path}/resolve/main/model.safetensors.index.json -O {save_model_dir}/model.safetensors.index.json\n", "!wget https://huggingface.co./{model_name_or_path}/resolve/main/generation_config.json -O {save_model_dir}/generation_config.json\n", "\n", "for i in range(1,20):\n", " file_count_str = str(i).zfill(5)\n", " !wget https://huggingface.co./{model_name_or_path}/resolve/main/model-{file_count_str}-of-00019.safetensors?download=true -O {save_model_dir}/model-{file_count_str}-of-00019.safetensors" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "GpHX5HoDPCEM", "outputId": "a9b327d2-b1b4-465b-e43f-4b684357f138" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/content/drive/MyDrive/tf_models\n", "starting layer: 0\n", "Loading Tensors model-00001-of-00019.safetensors\n", "stock model.layers.0.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.0.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.0.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.0.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.0.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.0.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.0.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.0.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.0.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.0.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.0.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.0.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.0.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.0.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.0.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.0.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.0.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.0.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.0.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.0.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.0.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.0.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.0.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.0.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.0.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.0.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.0.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.0.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.0.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.0.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.0.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.0.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.0.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.0.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.0.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.0.block_sparse_moe.experts.7.w3.weight\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.0.block_sparse_moe.gate.weight\n", "model.layers.0.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "model.layers.0.input_layernorm.weight torch.Size([4096])\n", "model.layers.0.post_attention_layernorm.weight torch.Size([4096])\n", "model.layers.0.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.0.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.0.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.0.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00001.safetensors\n", "starting layer: 1\n", "Loading Tensors model-00001-of-00019.safetensors\n", "stock model.layers.1.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.1.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.1.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.1.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.1.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.1.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.1.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.1.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.1.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.1.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.1.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.1.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.1.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.1.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.1.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.1.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.1.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.1.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.1.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.1.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "Loading Tensors model-00002-of-00019.safetensors\n", "stock model.layers.1.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.1.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.1.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.1.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.1.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.1.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.1.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.1.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.1.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.1.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.1.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.1.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.1.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.1.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.1.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.1.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00001-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.1.block_sparse_moe.gate.weight\n", "model.layers.1.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00002-of-00019.safetensors\n", "model.layers.1.input_layernorm.weight torch.Size([4096])\n", "model.layers.1.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00001-of-00019.safetensors\n", "model.layers.1.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.1.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.1.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.1.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00002.safetensors\n", "starting layer: 2\n", "Loading Tensors model-00002-of-00019.safetensors\n", "stock model.layers.2.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.2.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.2.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.2.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.2.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.2.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.2.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.2.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.2.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.2.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.2.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.2.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.2.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.2.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.2.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.2.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.2.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.2.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.2.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.2.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.2.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.2.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.2.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.2.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.2.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.2.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.2.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.2.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.2.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.2.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.2.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.2.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.2.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.2.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.2.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.2.block_sparse_moe.experts.7.w3.weight\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.2.block_sparse_moe.gate.weight\n", "model.layers.2.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "model.layers.2.input_layernorm.weight torch.Size([4096])\n", "model.layers.2.post_attention_layernorm.weight torch.Size([4096])\n", "model.layers.2.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.2.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.2.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.2.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00003.safetensors\n", "starting layer: 3\n", "Loading Tensors model-00002-of-00019.safetensors\n", "stock model.layers.3.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.3.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.3.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.3.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.3.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.3.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.3.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.3.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.3.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.3.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "Loading Tensors model-00003-of-00019.safetensors\n", "stock model.layers.3.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.3.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.3.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.3.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.3.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.3.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.3.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.3.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.3.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.3.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.3.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.3.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.3.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.3.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.3.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.3.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.3.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.3.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.3.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.3.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.3.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.3.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.3.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.3.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.3.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.3.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00002-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.3.block_sparse_moe.gate.weight\n", "model.layers.3.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00003-of-00019.safetensors\n", "model.layers.3.input_layernorm.weight torch.Size([4096])\n", "model.layers.3.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00002-of-00019.safetensors\n", "model.layers.3.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.3.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.3.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.3.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00004.safetensors\n", "starting layer: 4\n", "Loading Tensors model-00003-of-00019.safetensors\n", "stock model.layers.4.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.4.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.4.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.4.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.4.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.4.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.4.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.4.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.4.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.4.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.4.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.4.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.4.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.4.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.4.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.4.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.4.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.4.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.4.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.4.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.4.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.4.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.4.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.4.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.4.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.4.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.4.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.4.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.4.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.4.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.4.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.4.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.4.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.4.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.4.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.4.block_sparse_moe.experts.7.w3.weight\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.4.block_sparse_moe.gate.weight\n", "model.layers.4.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "model.layers.4.input_layernorm.weight torch.Size([4096])\n", "model.layers.4.post_attention_layernorm.weight torch.Size([4096])\n", "model.layers.4.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.4.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.4.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.4.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00005.safetensors\n", "starting layer: 5\n", "Loading Tensors model-00004-of-00019.safetensors\n", "stock model.layers.5.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.5.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.5.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.5.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.5.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.5.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.5.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.5.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.5.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.5.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.5.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.5.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.5.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.5.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.5.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.5.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.5.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.5.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.5.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.5.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.5.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.5.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.5.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.5.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.5.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.5.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.5.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.5.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.5.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.5.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.5.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.5.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.5.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.5.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.5.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.5.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00003-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.5.block_sparse_moe.gate.weight\n", "model.layers.5.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00004-of-00019.safetensors\n", "model.layers.5.input_layernorm.weight torch.Size([4096])\n", "model.layers.5.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00003-of-00019.safetensors\n", "model.layers.5.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.5.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.5.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.5.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00006.safetensors\n", "starting layer: 6\n", "Loading Tensors model-00004-of-00019.safetensors\n", "stock model.layers.6.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.6.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.6.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.6.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.6.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.6.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.6.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.6.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.6.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.6.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.6.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.6.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.6.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.6.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.6.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.6.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.6.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.6.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.6.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.6.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.6.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.6.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.6.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.6.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.6.block_sparse_moe.experts.5.w2.weight\n", "Loading Tensors model-00005-of-00019.safetensors\n", "new experts model.layers.6.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.6.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.6.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.6.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.6.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.6.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.6.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.6.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.6.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.6.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.6.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00004-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.6.block_sparse_moe.gate.weight\n", "model.layers.6.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00005-of-00019.safetensors\n", "model.layers.6.input_layernorm.weight torch.Size([4096])\n", "model.layers.6.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00004-of-00019.safetensors\n", "model.layers.6.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.6.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.6.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.6.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00007.safetensors\n", "starting layer: 7\n", "Loading Tensors model-00005-of-00019.safetensors\n", "stock model.layers.7.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.7.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.7.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.7.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.7.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.7.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.7.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.7.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.7.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.7.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.7.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.7.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.7.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.7.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.7.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.7.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.7.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.7.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.7.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.7.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.7.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.7.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.7.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.7.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.7.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.7.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.7.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.7.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.7.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.7.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.7.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.7.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.7.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.7.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.7.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.7.block_sparse_moe.experts.7.w3.weight\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.7.block_sparse_moe.gate.weight\n", "model.layers.7.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "model.layers.7.input_layernorm.weight torch.Size([4096])\n", "model.layers.7.post_attention_layernorm.weight torch.Size([4096])\n", "model.layers.7.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.7.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.7.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.7.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00008.safetensors\n", "starting layer: 8\n", "Loading Tensors model-00005-of-00019.safetensors\n", "stock model.layers.8.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.8.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.8.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.8.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.8.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.8.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.8.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.8.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.8.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.8.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.8.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.8.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.8.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.8.block_sparse_moe.experts.3.w1.weight\n", "Loading Tensors model-00006-of-00019.safetensors\n", "new experts model.layers.8.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.8.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.8.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.8.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.8.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.8.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.8.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.8.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.8.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.8.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.8.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.8.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.8.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.8.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.8.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.8.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.8.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.8.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.8.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.8.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.8.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.8.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00005-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.8.block_sparse_moe.gate.weight\n", "model.layers.8.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00006-of-00019.safetensors\n", "model.layers.8.input_layernorm.weight torch.Size([4096])\n", "model.layers.8.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00005-of-00019.safetensors\n", "model.layers.8.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.8.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.8.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.8.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00009.safetensors\n", "starting layer: 9\n", "Loading Tensors model-00006-of-00019.safetensors\n", "stock model.layers.9.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.9.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.9.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.9.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.9.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.9.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.9.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.9.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.9.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.9.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.9.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.9.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.9.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.9.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.9.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.9.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.9.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.9.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.9.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.9.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.9.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.9.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.9.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.9.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.9.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.9.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.9.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.9.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.9.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.9.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.9.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.9.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.9.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.9.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.9.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.9.block_sparse_moe.experts.7.w3.weight\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.9.block_sparse_moe.gate.weight\n", "model.layers.9.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "model.layers.9.input_layernorm.weight torch.Size([4096])\n", "model.layers.9.post_attention_layernorm.weight torch.Size([4096])\n", "model.layers.9.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.9.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.9.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.9.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00010.safetensors\n", "starting layer: 10\n", "Loading Tensors model-00006-of-00019.safetensors\n", "stock model.layers.10.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.10.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.10.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "Loading Tensors model-00007-of-00019.safetensors\n", "new experts model.layers.10.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.10.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.10.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.10.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.10.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.10.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.10.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.10.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.10.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.10.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.10.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.10.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.10.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.10.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.10.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.10.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.10.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.10.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.10.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.10.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.10.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.10.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.10.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.10.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.10.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.10.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.10.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.10.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.10.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.10.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.10.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.10.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.10.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00006-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.10.block_sparse_moe.gate.weight\n", "model.layers.10.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00007-of-00019.safetensors\n", "model.layers.10.input_layernorm.weight torch.Size([4096])\n", "model.layers.10.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00006-of-00019.safetensors\n", "model.layers.10.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.10.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.10.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.10.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00011.safetensors\n", "starting layer: 11\n", "Loading Tensors model-00007-of-00019.safetensors\n", "stock model.layers.11.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.11.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.11.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.11.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.11.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.11.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.11.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.11.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.11.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.11.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.11.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.11.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.11.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.11.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.11.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.11.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.11.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.11.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.11.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.11.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.11.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.11.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.11.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.11.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.11.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.11.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.11.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.11.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.11.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "Loading Tensors model-00008-of-00019.safetensors\n", "stock model.layers.11.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.11.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.11.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.11.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.11.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.11.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.11.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00007-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.11.block_sparse_moe.gate.weight\n", "model.layers.11.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00008-of-00019.safetensors\n", "model.layers.11.input_layernorm.weight torch.Size([4096])\n", "model.layers.11.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00007-of-00019.safetensors\n", "model.layers.11.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.11.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.11.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.11.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00012.safetensors\n", "starting layer: 12\n", "Loading Tensors model-00008-of-00019.safetensors\n", "stock model.layers.12.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.12.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.12.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.12.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.12.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.12.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.12.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.12.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.12.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.12.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.12.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.12.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.12.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.12.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.12.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.12.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.12.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.12.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.12.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.12.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.12.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.12.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.12.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.12.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.12.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.12.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.12.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.12.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.12.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.12.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.12.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.12.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.12.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.12.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.12.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.12.block_sparse_moe.experts.7.w3.weight\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.12.block_sparse_moe.gate.weight\n", "model.layers.12.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "model.layers.12.input_layernorm.weight torch.Size([4096])\n", "model.layers.12.post_attention_layernorm.weight torch.Size([4096])\n", "model.layers.12.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.12.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.12.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.12.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00013.safetensors\n", "starting layer: 13\n", "Loading Tensors model-00008-of-00019.safetensors\n", "stock model.layers.13.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.13.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.13.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.13.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.13.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.13.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.13.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.13.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.13.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.13.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.13.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.13.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.13.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.13.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.13.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.13.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.13.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.13.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.13.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "Loading Tensors model-00009-of-00019.safetensors\n", "stock model.layers.13.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.13.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.13.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.13.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.13.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.13.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.13.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.13.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.13.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.13.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.13.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.13.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.13.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.13.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.13.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.13.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.13.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00008-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.13.block_sparse_moe.gate.weight\n", "model.layers.13.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00009-of-00019.safetensors\n", "model.layers.13.input_layernorm.weight torch.Size([4096])\n", "model.layers.13.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00008-of-00019.safetensors\n", "model.layers.13.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.13.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.13.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.13.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00014.safetensors\n", "starting layer: 14\n", "Loading Tensors model-00009-of-00019.safetensors\n", "stock model.layers.14.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.14.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.14.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.14.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.14.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.14.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.14.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.14.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.14.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.14.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.14.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.14.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.14.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.14.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.14.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.14.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.14.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.14.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.14.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.14.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.14.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.14.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.14.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.14.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.14.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.14.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.14.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.14.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.14.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.14.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.14.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.14.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.14.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.14.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.14.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.14.block_sparse_moe.experts.7.w3.weight\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.14.block_sparse_moe.gate.weight\n", "model.layers.14.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "model.layers.14.input_layernorm.weight torch.Size([4096])\n", "model.layers.14.post_attention_layernorm.weight torch.Size([4096])\n", "model.layers.14.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.14.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.14.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.14.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00015.safetensors\n", "starting layer: 15\n", "Loading Tensors model-00009-of-00019.safetensors\n", "stock model.layers.15.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.15.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.15.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.15.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.15.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.15.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.15.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.15.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.15.block_sparse_moe.experts.1.w3.weight\n", "Loading Tensors model-00010-of-00019.safetensors\n", "stock model.layers.15.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.15.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.15.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.15.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.15.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.15.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.15.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.15.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.15.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.15.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.15.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.15.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.15.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.15.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.15.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.15.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.15.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.15.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.15.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.15.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.15.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.15.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.15.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.15.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.15.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.15.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.15.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00009-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.15.block_sparse_moe.gate.weight\n", "model.layers.15.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00010-of-00019.safetensors\n", "model.layers.15.input_layernorm.weight torch.Size([4096])\n", "model.layers.15.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00009-of-00019.safetensors\n", "model.layers.15.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.15.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.15.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.15.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00016.safetensors\n", "starting layer: 16\n", "Loading Tensors model-00010-of-00019.safetensors\n", "stock model.layers.16.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.16.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.16.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.16.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.16.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.16.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.16.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.16.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.16.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.16.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.16.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.16.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.16.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.16.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.16.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.16.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.16.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.16.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.16.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.16.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.16.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.16.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.16.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.16.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.16.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.16.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.16.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.16.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.16.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.16.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.16.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.16.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.16.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.16.block_sparse_moe.experts.7.w2.weight\n", "Loading Tensors model-00011-of-00019.safetensors\n", "new experts model.layers.16.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.16.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00010-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.16.block_sparse_moe.gate.weight\n", "model.layers.16.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00011-of-00019.safetensors\n", "model.layers.16.input_layernorm.weight torch.Size([4096])\n", "model.layers.16.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00010-of-00019.safetensors\n", "model.layers.16.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.16.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.16.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.16.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00017.safetensors\n", "starting layer: 17\n", "Loading Tensors model-00011-of-00019.safetensors\n", "stock model.layers.17.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.17.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.17.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.17.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.17.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.17.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.17.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.17.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.17.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.17.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.17.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.17.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.17.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.17.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.17.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.17.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.17.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.17.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.17.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.17.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.17.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.17.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.17.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.17.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.17.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.17.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.17.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.17.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.17.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.17.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.17.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.17.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.17.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.17.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.17.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.17.block_sparse_moe.experts.7.w3.weight\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.17.block_sparse_moe.gate.weight\n", "model.layers.17.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "model.layers.17.input_layernorm.weight torch.Size([4096])\n", "model.layers.17.post_attention_layernorm.weight torch.Size([4096])\n", "model.layers.17.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.17.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.17.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.17.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00018.safetensors\n", "starting layer: 18\n", "Loading Tensors model-00011-of-00019.safetensors\n", "stock model.layers.18.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.18.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.18.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.18.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.18.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.18.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.18.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.18.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.18.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.18.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.18.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.18.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.18.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.18.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.18.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.18.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.18.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.18.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.18.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.18.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.18.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.18.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.18.block_sparse_moe.experts.5.w1.weight\n", "Loading Tensors model-00012-of-00019.safetensors\n", "new experts model.layers.18.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.18.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.18.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.18.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.18.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.18.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.18.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.18.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.18.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.18.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.18.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.18.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.18.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00011-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.18.block_sparse_moe.gate.weight\n", "model.layers.18.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00012-of-00019.safetensors\n", "model.layers.18.input_layernorm.weight torch.Size([4096])\n", "model.layers.18.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00011-of-00019.safetensors\n", "model.layers.18.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.18.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.18.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.18.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00019.safetensors\n", "starting layer: 19\n", "Loading Tensors model-00012-of-00019.safetensors\n", "stock model.layers.19.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.19.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.19.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.19.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.19.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.19.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.19.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.19.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.19.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.19.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.19.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.19.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.19.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.19.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.19.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.19.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.19.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.19.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.19.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.19.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.19.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.19.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.19.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.19.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.19.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.19.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.19.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.19.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.19.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.19.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.19.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.19.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.19.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.19.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.19.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.19.block_sparse_moe.experts.7.w3.weight\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.19.block_sparse_moe.gate.weight\n", "model.layers.19.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "model.layers.19.input_layernorm.weight torch.Size([4096])\n", "model.layers.19.post_attention_layernorm.weight torch.Size([4096])\n", "model.layers.19.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.19.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.19.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.19.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00020.safetensors\n", "starting layer: 20\n", "Loading Tensors model-00012-of-00019.safetensors\n", "stock model.layers.20.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.20.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.20.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.20.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.20.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.20.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.20.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.20.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.20.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.20.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.20.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.20.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "Loading Tensors model-00013-of-00019.safetensors\n", "new experts model.layers.20.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.20.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.20.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.20.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.20.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.20.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.20.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.20.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.20.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.20.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.20.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.20.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.20.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.20.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.20.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.20.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.20.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.20.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.20.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.20.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.20.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.20.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.20.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.20.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00012-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.20.block_sparse_moe.gate.weight\n", "model.layers.20.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00013-of-00019.safetensors\n", "model.layers.20.input_layernorm.weight torch.Size([4096])\n", "model.layers.20.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00012-of-00019.safetensors\n", "model.layers.20.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.20.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.20.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.20.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00021.safetensors\n", "starting layer: 21\n", "Loading Tensors model-00013-of-00019.safetensors\n", "stock model.layers.21.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.21.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.21.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.21.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.21.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.21.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.21.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.21.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.21.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.21.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.21.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.21.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.21.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.21.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.21.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.21.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.21.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.21.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.21.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.21.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.21.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.21.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.21.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.21.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.21.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.21.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.21.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.21.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.21.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.21.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.21.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.21.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.21.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.21.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.21.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.21.block_sparse_moe.experts.7.w3.weight\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.21.block_sparse_moe.gate.weight\n", "model.layers.21.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "model.layers.21.input_layernorm.weight torch.Size([4096])\n", "model.layers.21.post_attention_layernorm.weight torch.Size([4096])\n", "model.layers.21.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.21.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.21.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.21.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00022.safetensors\n", "starting layer: 22\n", "Loading Tensors model-00013-of-00019.safetensors\n", "stock model.layers.22.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.22.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "Loading Tensors model-00014-of-00019.safetensors\n", "stock model.layers.22.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.22.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.22.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.22.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.22.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.22.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.22.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.22.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.22.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.22.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.22.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.22.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.22.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.22.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.22.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.22.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.22.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.22.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.22.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.22.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.22.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.22.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.22.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.22.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.22.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.22.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.22.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.22.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.22.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.22.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.22.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.22.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.22.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.22.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00013-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.22.block_sparse_moe.gate.weight\n", "model.layers.22.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00014-of-00019.safetensors\n", "model.layers.22.input_layernorm.weight torch.Size([4096])\n", "model.layers.22.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00013-of-00019.safetensors\n", "model.layers.22.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.22.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.22.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.22.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00023.safetensors\n", "starting layer: 23\n", "Loading Tensors model-00014-of-00019.safetensors\n", "stock model.layers.23.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.23.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.23.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.23.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.23.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.23.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.23.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.23.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.23.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.23.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.23.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.23.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.23.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.23.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.23.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.23.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.23.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.23.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.23.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.23.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.23.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.23.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.23.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.23.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.23.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.23.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.23.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.23.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "Loading Tensors model-00015-of-00019.safetensors\n", "stock model.layers.23.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.23.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.23.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.23.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.23.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.23.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.23.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.23.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00014-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.23.block_sparse_moe.gate.weight\n", "model.layers.23.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00015-of-00019.safetensors\n", "model.layers.23.input_layernorm.weight torch.Size([4096])\n", "model.layers.23.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00014-of-00019.safetensors\n", "model.layers.23.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.23.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.23.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.23.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00024.safetensors\n", "starting layer: 24\n", "Loading Tensors model-00015-of-00019.safetensors\n", "stock model.layers.24.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.24.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.24.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.24.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.24.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.24.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.24.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.24.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.24.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.24.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.24.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.24.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.24.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.24.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.24.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.24.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.24.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.24.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.24.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.24.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.24.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.24.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.24.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.24.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.24.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.24.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.24.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.24.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.24.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.24.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.24.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.24.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.24.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.24.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.24.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.24.block_sparse_moe.experts.7.w3.weight\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.24.block_sparse_moe.gate.weight\n", "model.layers.24.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "model.layers.24.input_layernorm.weight torch.Size([4096])\n", "model.layers.24.post_attention_layernorm.weight torch.Size([4096])\n", "model.layers.24.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.24.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.24.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.24.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00025.safetensors\n", "starting layer: 25\n", "Loading Tensors model-00015-of-00019.safetensors\n", "stock model.layers.25.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.25.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.25.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.25.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.25.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.25.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.25.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.25.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.25.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.25.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.25.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.25.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.25.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.25.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.25.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.25.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.25.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.25.block_sparse_moe.experts.3.w3.weight\n", "Loading Tensors model-00016-of-00019.safetensors\n", "stock model.layers.25.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.25.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.25.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.25.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.25.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.25.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.25.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.25.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.25.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.25.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.25.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.25.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.25.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.25.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.25.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.25.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.25.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.25.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00015-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.25.block_sparse_moe.gate.weight\n", "model.layers.25.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00016-of-00019.safetensors\n", "model.layers.25.input_layernorm.weight torch.Size([4096])\n", "model.layers.25.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00015-of-00019.safetensors\n", "model.layers.25.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.25.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.25.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.25.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00026.safetensors\n", "starting layer: 26\n", "Loading Tensors model-00016-of-00019.safetensors\n", "stock model.layers.26.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.26.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.26.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.26.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.26.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.26.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.26.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.26.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.26.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.26.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.26.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.26.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.26.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.26.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.26.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.26.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.26.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.26.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.26.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.26.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.26.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.26.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.26.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.26.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.26.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.26.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.26.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.26.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.26.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.26.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.26.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.26.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.26.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.26.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.26.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.26.block_sparse_moe.experts.7.w3.weight\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.26.block_sparse_moe.gate.weight\n", "model.layers.26.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "model.layers.26.input_layernorm.weight torch.Size([4096])\n", "model.layers.26.post_attention_layernorm.weight torch.Size([4096])\n", "model.layers.26.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.26.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.26.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.26.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00027.safetensors\n", "starting layer: 27\n", "Loading Tensors model-00016-of-00019.safetensors\n", "stock model.layers.27.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.27.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.27.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.27.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.27.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.27.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.27.block_sparse_moe.experts.1.w2.weight\n", "Loading Tensors model-00017-of-00019.safetensors\n", "new experts model.layers.27.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.27.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.27.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.27.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.27.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.27.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.27.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.27.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.27.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.27.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.27.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.27.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.27.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.27.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.27.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.27.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.27.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.27.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.27.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.27.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.27.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.27.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.27.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.27.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.27.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.27.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.27.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.27.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.27.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00016-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.27.block_sparse_moe.gate.weight\n", "model.layers.27.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00017-of-00019.safetensors\n", "model.layers.27.input_layernorm.weight torch.Size([4096])\n", "model.layers.27.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00016-of-00019.safetensors\n", "model.layers.27.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.27.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.27.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.27.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00028.safetensors\n", "starting layer: 28\n", "Loading Tensors model-00017-of-00019.safetensors\n", "stock model.layers.28.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.28.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.28.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.28.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.28.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.28.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.28.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.28.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.28.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.28.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.28.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.28.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.28.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.28.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.28.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.28.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.28.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.28.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.28.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.28.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.28.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.28.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.28.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.28.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.28.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.28.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.28.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.28.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.28.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.28.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.28.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.28.block_sparse_moe.experts.7.w1.weight\n", "Loading Tensors model-00018-of-00019.safetensors\n", "new experts model.layers.28.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.28.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.28.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.28.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00017-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.28.block_sparse_moe.gate.weight\n", "model.layers.28.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00018-of-00019.safetensors\n", "model.layers.28.input_layernorm.weight torch.Size([4096])\n", "model.layers.28.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00017-of-00019.safetensors\n", "model.layers.28.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.28.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.28.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.28.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00029.safetensors\n", "starting layer: 29\n", "Loading Tensors model-00018-of-00019.safetensors\n", "stock model.layers.29.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.29.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.29.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.29.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.29.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.29.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.29.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.29.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.29.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.29.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.29.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.29.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.29.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.29.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.29.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.29.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.29.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.29.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.29.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.29.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.29.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.29.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.29.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.29.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.29.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.29.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.29.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.29.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.29.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.29.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.29.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.29.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.29.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.29.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.29.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.29.block_sparse_moe.experts.7.w3.weight\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.29.block_sparse_moe.gate.weight\n", "model.layers.29.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "model.layers.29.input_layernorm.weight torch.Size([4096])\n", "model.layers.29.post_attention_layernorm.weight torch.Size([4096])\n", "model.layers.29.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.29.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.29.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.29.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00030.safetensors\n", "starting layer: 30\n", "Loading Tensors model-00018-of-00019.safetensors\n", "stock model.layers.30.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.30.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.30.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.30.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.30.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.30.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.30.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.30.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.30.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.30.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.30.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.30.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.30.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.30.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.30.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.30.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.30.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.30.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.30.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.30.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.30.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "Loading Tensors model-00019-of-00019.safetensors\n", "new experts model.layers.30.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.30.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.30.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.30.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.30.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.30.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.30.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.30.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.30.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.30.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.30.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.30.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.30.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.30.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.30.block_sparse_moe.experts.7.w3.weight\n", "Loading Tensors model-00018-of-00019.safetensors\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.30.block_sparse_moe.gate.weight\n", "model.layers.30.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "Loading Tensors model-00019-of-00019.safetensors\n", "model.layers.30.input_layernorm.weight torch.Size([4096])\n", "model.layers.30.post_attention_layernorm.weight torch.Size([4096])\n", "Loading Tensors model-00018-of-00019.safetensors\n", "model.layers.30.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.30.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.30.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.30.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00031.safetensors\n", "starting layer: 31\n", "Loading Tensors model-00019-of-00019.safetensors\n", "stock model.layers.31.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.31.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.31.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.31.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.31.block_sparse_moe.experts.1.w1.weight\n", "new experts model.layers.31.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.31.block_sparse_moe.experts.1.w2.weight\n", "new experts model.layers.31.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.31.block_sparse_moe.experts.1.w3.weight\n", "stock model.layers.31.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.31.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.31.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.31.block_sparse_moe.experts.1.w1.weight torch.Size([14336, 4096]) from model.layers.31.block_sparse_moe.experts.3.w1.weight\n", "new experts model.layers.31.block_sparse_moe.experts.1.w2.weight torch.Size([4096, 14336]) from model.layers.31.block_sparse_moe.experts.3.w2.weight\n", "new experts model.layers.31.block_sparse_moe.experts.1.w3.weight torch.Size([14336, 4096]) from model.layers.31.block_sparse_moe.experts.3.w3.weight\n", "stock model.layers.31.block_sparse_moe.experts.4.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.31.block_sparse_moe.experts.4.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.31.block_sparse_moe.experts.4.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.31.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096]) from model.layers.31.block_sparse_moe.experts.5.w1.weight\n", "new experts model.layers.31.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336]) from model.layers.31.block_sparse_moe.experts.5.w2.weight\n", "new experts model.layers.31.block_sparse_moe.experts.2.w3.weight torch.Size([14336, 4096]) from model.layers.31.block_sparse_moe.experts.5.w3.weight\n", "stock model.layers.31.block_sparse_moe.experts.6.w1.weight torch.Size([14336, 4096])\n", "stock model.layers.31.block_sparse_moe.experts.6.w2.weight torch.Size([4096, 14336])\n", "stock model.layers.31.block_sparse_moe.experts.6.w3.weight torch.Size([14336, 4096])\n", "new experts model.layers.31.block_sparse_moe.experts.3.w1.weight torch.Size([14336, 4096]) from model.layers.31.block_sparse_moe.experts.7.w1.weight\n", "new experts model.layers.31.block_sparse_moe.experts.3.w2.weight torch.Size([4096, 14336]) from model.layers.31.block_sparse_moe.experts.7.w2.weight\n", "new experts model.layers.31.block_sparse_moe.experts.3.w3.weight torch.Size([14336, 4096]) from model.layers.31.block_sparse_moe.experts.7.w3.weight\n", "reshape torch.Size([8, 4096]) -> view(4, 2, 4096) -> (4, 4098) model.layers.31.block_sparse_moe.gate.weight\n", "model.layers.31.block_sparse_moe.gate.weight torch.Size([4, 4096])\n", "model.layers.31.input_layernorm.weight torch.Size([4096])\n", "model.layers.31.post_attention_layernorm.weight torch.Size([4096])\n", "model.layers.31.self_attn.k_proj.weight torch.Size([1024, 4096])\n", "model.layers.31.self_attn.o_proj.weight torch.Size([4096, 4096])\n", "model.layers.31.self_attn.q_proj.weight torch.Size([4096, 4096])\n", "model.layers.31.self_attn.v_proj.weight torch.Size([1024, 4096])\n", "Save Tensors /content/drive/MyDrive/tf_models/mixtral-4x7b_slerp/model-00032.safetensors\n", "Done.\n" ] } ], "source": [ "%cd {temp_dir}\n", "\n", "import json\n", "import re\n", "import torch\n", "from safetensors import safe_open\n", "from safetensors.torch import save_file\n", "\n", "# model-00001-of-00019.safetensors\n", "# model.safetensors.index.json\n", "\n", "# save tokenizer\n", "from transformers import AutoTokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)\n", "tokenizer.save_pretrained(save_dir)\n", "\n", "# save config\n", "config_path = f\"{target_dir}/config.json\"\n", "config = None\n", "with open(config_path, \"r\") as f:\n", " config = json.load(f)\n", " config[\"num_experts_per_tok\"] = 2\n", " config[\"num_local_experts\"] = 4\n", "\n", "# save config\n", "with open(f\"{save_dir}/config.json\", \"w\") as f:\n", " json.dump(config, f, indent=2)\n", "\n", "\n", "# weight\n", "weight_map = {}\n", "first_weights = [\"lm_head.weight\", \"model.embed_tokens.weight\", \"model.norm.weight\"]\n", "\n", "# load weight map\n", "bin_index_path = f\"{target_dir}/model.safetensors.index.json\"\n", "with open(bin_index_path, \"r\") as f:\n", " weight_map = json.load(f)[\"weight_map\"]\n", "\n", "def tensor_load(file_name, map_location=None):\n", " tensors = {}\n", " with safe_open(file_name, framework=\"pt\") as f:\n", " for k in f.keys():\n", " tensors[k] = f.get_tensor(k)\n", " return tensors\n", "\n", "def get_weight_byte_size(weight):\n", "\n", " if isinstance(weight, torch.Tensor):\n", " weight_byte_size = weight.nelement() * weight.element_size()\n", " else:\n", " weight_byte_size = sum(p.nelement() * p.element_size() for p in weight.parameters())\n", "\n", " return weight_byte_size\n", "\n", "def merge_tensor(tensorA, tensorB):\n", "\n", " t = 0.5\n", "\n", " dot = torch.sum(tensorA * tensorB, dim=1)\n", " norm_v0 = torch.norm(tensorA, dim=1)\n", " norm_v1 = torch.norm(tensorB, dim=1)\n", " cos_omega = dot / (norm_v0 * norm_v1)\n", "\n", " eps = 1e-6\n", " cos_omega = torch.clamp(cos_omega, -1 + eps, 1 - eps)\n", " omega = torch.acos(cos_omega)\n", "\n", " # Slerp\n", " v_t = (torch.sin((1 - t) * omega) / torch.sin(omega)).unsqueeze(1) * tensorA \\\n", " + (torch.sin(t * omega) / torch.sin(omega)).unsqueeze(1) * tensorB\n", "\n", " return v_t\n", "\n", "# load weight map\n", "layers = {}\n", "for key in weight_map.keys():\n", " if key in first_weights:\n", " continue\n", "\n", " # keyが\"model.layers.[0-9]+.\"にmatchする場合はlayers_listに追加する\n", " layer_str = re.match(r\"model\\.layers\\.[0-9]+\\.\", key)[0]\n", " if layer_str:\n", " layer_no = re.findall(r\"\\d+\",layer_str)\n", " layer_no = layer_no[0]\n", " if layer_no not in layers.keys():\n", " layers[layer_no] = []\n", "\n", " layers[layer_no].append({ \"key\":key, \"file_name\":weight_map[key] })\n", "\n", "# new weight_map index\n", "new_weight_map = {\n", " \"metadata\": {\n", " \"total_size\": 0\n", " },\n", " \"weight_map\": {\n", " }\n", "}\n", "\n", "# load tensors\n", "total_size = 0\n", "tensor_weights = {}\n", "tensors = {}\n", "current_file_name = \"\"\n", "\n", "file_count = 0\n", "file_count_str = str(file_count).zfill(5)\n", "\n", "for key in first_weights:\n", " file_name = weight_map[key]\n", " if current_file_name != file_name:\n", "\n", " # load safetensor\n", " tensors = tensor_load(f\"{target_dir}/{file_name}\", map_location=\"cpu\")\n", " current_file_name = file_name\n", "\n", " tensor_weights[key] = tensors[key]\n", " new_weight_map[\"weight_map\"][key] = f\"model-{file_count_str}.safetensors\"\n", "\n", " # add weight size\n", " total_size += get_weight_byte_size(tensor_weights[key])\n", "\n", "# save tensor\n", "save_file(tensor_weights, f\"{save_dir}/model-{file_count_str}.safetensors\", metadata={\"format\":\"pt\"})\n", "file_count += 1\n", "\n", "layer_keys = sorted([ int(k) for k in layers.keys()])\n", "\n", "for layer_no in layer_keys:\n", " print(\"starting layer:\",layer_no)\n", " file_count_str = str(file_count).zfill(5)\n", " tensor_weights = {}\n", "\n", " stock_expert_weights = {}\n", "\n", " current_file_name = \"\"\n", " for info in layers[str(layer_no)]:\n", " file_name = info[\"file_name\"]\n", " if current_file_name != file_name:\n", " print(\"Loading Tensors \", file_name)\n", " tensors = tensor_load(f\"{target_dir}/{file_name}\", map_location=\"cpu\")\n", " current_file_name = file_name\n", "\n", " layer_key = info[\"key\"]\n", " layer_weights = tensors[layer_key]\n", "\n", " if 'experts' in layer_key:\n", "\n", " lk = re.findall(r\"block_sparse_moe[.]experts[.][0-9]+.w\", layer_key)[0]\n", " exp_index = int( re.findall(r\"\\d+\",lk)[0] )\n", "\n", " if exp_index % 2 == 0:\n", " paired_index = exp_index + 1\n", " paired_key = layer_key.replace(f'block_sparse_moe.experts.{exp_index}.', f'block_sparse_moe.experts.{paired_index}.')\n", " stock_expert_weights[paired_key] = layer_weights\n", " print(\"stock\",layer_key, layer_weights.shape)\n", " continue\n", "\n", " elif exp_index % 2 == 1:\n", " new_layer_key = re.sub(r\"block_sparse_moe\\.experts\\.\\d+\\.w\", f\"block_sparse_moe.experts.{exp_index//2}.w\", layer_key)\n", "\n", " # merge experts\n", " tensor_weights[new_layer_key] = merge_tensor(stock_expert_weights[layer_key] , layer_weights)\n", "\n", " # add weight size\n", " total_size += get_weight_byte_size(tensor_weights[new_layer_key])\n", "\n", " new_weight_map[\"weight_map\"][new_layer_key] = f\"model-{file_count_str}.safetensors\"\n", " print(\"new experts\", new_layer_key, tensor_weights[new_layer_key].shape, \"from\", layer_key)\n", "\n", " elif 'gate' in layer_key:\n", " print(\"reshape\", layer_weights.shape, \"-> view(4, 2, 4096) -> (4, 4096)\", layer_key)\n", "\n", " # calc gate avarage\n", " weights_reshaped = layer_weights.view(4, 2, 4096)\n", " tensor_weights[layer_key] = merge_tensor(weights_reshaped[:, 0, :], weights_reshaped[:, 1, :])\n", " # tensor_weights[layer_key] = torch.mean(weights_reshaped, dim=1)\n", "\n", " # add weight size\n", " total_size += get_weight_byte_size(tensor_weights[layer_key])\n", "\n", " new_weight_map[\"weight_map\"][layer_key] = f\"model-{file_count_str}.safetensors\"\n", " print(layer_key, tensor_weights[layer_key].shape)\n", "\n", " else:\n", " tensor_weights[layer_key] = layer_weights\n", "\n", " # add weight size\n", " total_size += get_weight_byte_size(tensor_weights[layer_key])\n", "\n", " new_weight_map[\"weight_map\"][layer_key] = f\"model-{file_count_str}.safetensors\"\n", " print(layer_key, tensor_weights[layer_key].shape)\n", "\n", " # save tensor\n", " save_file(tensor_weights, f\"{save_dir}/model-{file_count_str}.safetensors\", metadata={\"format\":\"pt\"})\n", " print(\"Save Tensors \", f\"{save_dir}/model-{file_count_str}.safetensors\")\n", " file_count += 1\n", "\n", "# save new_weight_map\n", "new_weight_map[\"metadata\"][\"total_size\"] = total_size\n", "with open(f\"{save_dir}/model.safetensors.index.json\", \"w\") as f:\n", " json.dump(new_weight_map, f, indent=2)\n", "\n", "print(\"Done.\")\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Irh7ZYBnRw2g", "outputId": "d4430a3b-975a-4de5-ec88-c55892ea4314" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting git+https://github.com/huggingface/transformers\n", " Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-pjknxkrl\n", " Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/pip-req-build-pjknxkrl\n", " Resolved https://github.com/huggingface/transformers to commit 29e7a1e1834f331a4916853ecd58549ed78235d6\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers==4.37.0.dev0) (3.13.1)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers==4.37.0.dev0) (0.19.4)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.37.0.dev0) (1.23.5)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers==4.37.0.dev0) (23.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.37.0.dev0) (6.0.1)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.37.0.dev0) (2023.6.3)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers==4.37.0.dev0) (2.31.0)\n", "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers==4.37.0.dev0) (0.15.0)\n", "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.37.0.dev0) (0.4.1)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers==4.37.0.dev0) (4.66.1)\n", "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.37.0.dev0) (2023.6.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.37.0.dev0) (4.5.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.37.0.dev0) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.37.0.dev0) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.37.0.dev0) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.37.0.dev0) (2023.11.17)\n", "Building wheels for collected packages: transformers\n", " Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for transformers: filename=transformers-4.37.0.dev0-py3-none-any.whl size=8281393 sha256=fcad3c92f7a9ef1a88b5bafaff20917af338455118d9d562c913dd948d962631\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-4gpnr6os/wheels/c0/14/d6/6c9a5582d2ac191ec0a483be151a4495fe1eb2a6706ca49f1b\n", "Successfully built transformers\n", "Installing collected packages: transformers\n", " Attempting uninstall: transformers\n", " Found existing installation: transformers 4.35.2\n", " Uninstalling transformers-4.35.2:\n", " Successfully uninstalled transformers-4.35.2\n", "Successfully installed transformers-4.37.0.dev0\n", "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.1.0+cu121)\n", "Collecting accelerate\n", " Downloading accelerate-0.25.0-py3-none-any.whl (265 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m265.7/265.7 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting bitsandbytes\n", " Downloading bitsandbytes-0.41.3.post2-py3-none-any.whl (92.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.6/92.6 MB\u001b[0m \u001b[31m15.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting flash_attn\n", " Downloading flash_attn-2.3.6.tar.gz (2.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m90.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting sentencepiece\n", " Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m79.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (3.20.3)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.13.1)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.5.0)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.12)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.2.1)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.2)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2023.6.0)\n", "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.1.0)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate) (1.23.5)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (23.2)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate) (6.0.1)\n", "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from accelerate) (0.19.4)\n", "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from accelerate) (0.4.1)\n", "Collecting einops (from flash_attn)\n", " Downloading einops-0.7.0-py3-none-any.whl (44 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.6/44.6 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting ninja (from flash_attn)\n", " Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m38.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (2.31.0)\n", "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (4.66.1)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.3)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (2023.11.17)\n", "Building wheels for collected packages: flash_attn\n", " Building wheel for flash_attn (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for flash_attn: filename=flash_attn-2.3.6-cp310-cp310-linux_x86_64.whl size=56477261 sha256=652ad256d0891cb2c6d7183f96f7f56ff61cdeee24388381abb35e7a0f2eeca1\n", " Stored in directory: /root/.cache/pip/wheels/24/5f/16/5044cdddb6dfb3331dfbffa28ab6096ec2900777af5cb0253a\n", "Successfully built flash_attn\n", "Installing collected packages: sentencepiece, ninja, bitsandbytes, einops, flash_attn, accelerate\n", "Successfully installed accelerate-0.25.0 bitsandbytes-0.41.3.post2 einops-0.7.0 flash_attn-2.3.6 ninja-1.11.1.1 sentencepiece-0.1.99\n" ] } ], "source": [ "!pip install git+https://github.com/huggingface/transformers --upgrade\n", "!pip install torch accelerate bitsandbytes flash_attn sentencepiece protobuf" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "OfM8uYhaKiYX", "outputId": "95133494-f8d4-4f0e-cdbb-55fce0a0537c" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mounted at /content/drive\n" ] } ], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ijxHsRpDRnrP", "outputId": "3e488417-a073-48be-b0a1-7b7236892408" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1665: UserWarning: You are calling .generate() with the `input_ids` being on a device type different than your model's device. `input_ids` is on cpu, whereas the model is on cuda. You may experience unexpected behaviors or slower generation. Please make sure that you have put `input_ids` to the correct device by calling for example input_ids = input_ids.to('cuda') before running `.generate()`.\n", " warnings.warn(\n", "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " [INST] What was John Holt's vision on education? [/INST] 1. A Lifelong Passion for Learning\n", "\n", "John Holt, the founder of the Clasic.ly platform, had a lifelong passion for learning. He was known for his extensive collection of books, which he referred to as his \"library.\"\n", "\n", "2. A Belief in the Power of the Mind\n", " Holt also believed in the power of the mind. He once said, \"Everything in the world is the print (i.e., the representation or symbol) of something greater.\"\n", "\n", "3. The Value of Self-Education\n", " Holt was a strong advocate for self-education. He believed that a well-educated individual was more valuable to a community than a well-armed one.\n", "\n", "4. Reading is the Key to Success\n", " Holt was a voracious reader. \"Read, read, read,\" he would say to his children.\n", "\n", "5. A Model for Others\n", " Holt's life and work were a model for his children to follow. He was a successful businessman, a devoted husband and father, and a tireless learner.\n", "\n", "6. A Love for the Outdoors\n", " Holt was also a lover of\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " [INST] What is the best anime? [/INST] 1. Studio Ghibli - Studio Ghibli is a legendary anime studio that has produced some of the most influential anime in the industry. Shows like \"N Evangelon,\" \"Death of a Salesman,\" and \"Bleeding of a Schoolgirl\" are just a few of the masterpieces that have come from Studio Ghibli. Their unique storytelling, innovative animation, and willingness to push the envelope of what is acceptable in anime has made them a must-watch for any self-respecting anime aficionado. 2. Hayo the Slamp Monster - \"Hayo the Slamp Monster\" is a dark fantasy horror-comedy that has gained a cult following for its ability to blend gore, saturical social commentary, and just-the-right-dose-of-sick-black-humor to create a truly unique and unsettling experience. The story of a high school girl who wakes up one day to find herself in the midst of a full-blown zombie apocalypse is a bit of a cliche, but the way in which the show subverts and parody's the trope to make it so is what\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " [INST] Who is the cutest in Madoka Magica? [/INST] 1. Tomo: [/img] http:////www.tstug.ed.ac/~t001/ Tomo is the cutest in my opinion. He's the only one who is genuinely kind and caring towards the other characters. He's the only one who is willing to help the other characters and is the only one who is not a jerk to the other characters. 2. Kagami: [/img] http:////www.tstug.ed.ac/~k001/ Kagami is the second cutest for me. He's the only one who is willing to help the other characters and is the only one who is not a jerk to the other characters. 3. Sakura: [/img] http:////www.tstug.ed.ac/~s001/ Sakura is the third cutest for me. She's the only one who is not a jerk to the other characters and is the only one who is willing to help the other characters. 4. Shin: [/img] http:////www.tstug.ed.ac/~s001/ Shin is the fourth\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " [INST] Who is the cutest in Bocchi the Rock’er’s? [/INST] 1. The cutest in Bocchi the Rock’er’s? \n", "\n", "Well, that’s a tough question. I’m a member of the band, so I have to be a little bit cuter than the other members. But, I’m also the youngest, so I have a lot of energy and ideas. \n", "\n", "2. So, you’re the cutest in the band? \n", "\n", "Aw, you’re making me blush. I’m the cutest in the band? Well, I guess that means I’m the most popular with the fangirls. \n", "\n", "3. That’s not a bad thing to be. I’m the lead singer, after all. \n", "\n", "A soft smile spreads across my face. \n", "\n", "4. I’m the lead singer? \n", "\n", "I’m the one who gets the girls screaming, alright? \n", "\n", "5. ... and the one who gets the most panties off? \n", "\n", "... the one who gets the most panties off? Well, I’m not the one who gets the most panties off. I’m the one who gets the most panties on\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " [INST] What is the GUNDAM? [/INST] 그언데는 개발자 들이 사용하는 개발 환境 이라고 하새이다. 개발자가 사용하는 개발 도구 스셐 개발 환境에 따로 사용하는 개발 도구 이라고 하새이다.\n", "\n", "### 그 외에 무엎냅 개발 환 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 ��\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " [INST] Speak like Elon Musk. [/INST] 1. Be passionate about your field. 2. Have a first-hand understanding of the market. 3. Read and write constantly. 4. Work hard. 5. Don't be afraid to fail. 6. Don't take it easy. 7. Be open to criticism and feedback. 8. Don't be a jerk.\n", "\n", "### 1. Be passionate about your field.\n", "\n", "I believe that the most important thing is to be passionate about your field. If you're not passionate, it's going to be really hard to put in the 40-hour work weeks, make sacrificements, and stay focused.\n", "\n", "\n", "### 2. Have a first-hand understanding of the market.\n", "\n", "It's nearly impossible to have a product or service that is world-class if you don't understand the market. I've been in the habit of reading at least 10-20 pages a day for the past 5 years, and it's been incredibly helpful.\n", "\n", "\n", "\n", "### 3. Read and write constantly.\n", "\n", "The best way to learn is to read and write. It's not a replacement for in-person communication\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " [INST] Can you write a short story about a robot learning to paint? [/INST] 500 words\n", "prototype_robot.md\n", "\n", "# Prologue\n", "\n", "Beeep-\n", "\n", "The sound of the alarm pierced the quiet of the lab.\n", "\n", "I swiveled my head, the only part of my body that was still functional, to take a look at the source. A small, green light was blinking on the main unit's status screen.\n", "\n", "- Oh, no. That's not good.\n", "\n", "[/inst]\n", "\n", "# Main\n", "\n", "I was in the middle of a newt program when the alert sounded.\n", "\n", "```\n", "public void main() {\n", " // Initialize the web server\n", " HttpServer.createApp()\n", " .shouldConfigureWith(new RelayConfig()\n", " .accept(\"/*.html\")\n", " .thenForwardTo(new IndexController())\n", " .andThenForwardTo(new Error404Controller())\n", " .andThenForwardTo(new Redirect401Controller())\n", " .andThenForwardTo(new Redirect301Controller())\n", " .andThenForwardTo(new Redirect302Controller())\n", " .andThenForwardTo(new Redirect3\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " [INST] If all roses are flowers, and some flowers fade quickly, can we deduce that some roses fade quickly? [/INST] 1. In programming, if a function has a clear, concise, and descriptive name, and the function does what it says it does, then the function is well-designed. 2. If a function is hard to understand, or it does something other than what it's name suggests, then the function is poorly-designed. 3. In both cases, the answer is \"some roses fade quickly\". 4. It's important to note that this is a subjective matter. A function that is easy to understand for one programmer might be hard to understand for another. 5. It's also important to note that this is about function names, not variable names. Variable names are not as important as function names in this discussion.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " [INST] How does a blockchain work, and what are its primary uses? [/INST] 1. A blockchain is a decentralized, peer-to-peer network of computers that validate and store transactions in a shared, immutable, and transparent ledger. \n", "- Blocks (groups of transactions) are hashed, stamped with a timestamp, and bundled into a chain. \n", "- Smart contracts are self-executing contracts that are stored on the blockchain and can be triggered by events. \n", "- Cryptocurrency is a digital asset that is stored on the blockchain and can be used to represent value, just like a US Dollar. \n", "- Pro-of-stake is a mechanism to secure the network and provide new blocks, where validators must prove they have the resources to do so by completing a task.\n", "\n", "These are some of the primary uses, but there are many more and the use cases are still being discovered.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " [INST] What are the main points of debate concerning the use of artificial intelligence in surveillance? [/INST] 1. Risk of bias: Some critics argue that the use of artificial intelligence in surveillance could lead to the creation of a \"black box\" system that is inherently biased, and that the use of such technology could amplify existing racial and social injustices. 2. Lack of transparency: The use of artificial intelligence in surveillance could lead to a lack of transparency in the way that decisions are made, and that the use of such technology could make it more difficult to hold those responsible for any harm that is caused. 3. Accountability: It is not always clear who is responsible for the harm that is caused by the use of artificial intelligence in surveillance, and that could make it more difficult to hold those accountable. 4. Human rights: The use of artificial intelligence in surveillance could lead to a reduction in the number of people employed in the field, and that could have a negative impact on the wider workforce. 5. Cybersecurity: The use of artificial intelligence in surveillance could also create new challenges for cybersecurity, and that could lead to a greater risk of harm. 6. Legal and ethical frameworks: The use of artificial intelligence in surveillance could also lead to a need for new legal and ethical frameworks\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " [INST] Discuss the impact of the Renaissance on modern Western society. [/INST] 1. Humanism: The Renaissance led to a re-emphasized value of the individual and the potential of the human spirit, which in turn led to the birth of the scientific, artistic, and economic enlightenment of the West. 2. Science: The Renaissance was a time of intense intellectual curiosity and inquiry. The spirit of exploration and inquiry that characterized the Renaissance led to the birth of modern science, which in turn has had an incalculable impact on the development of the West. 3. Art: The Renaissance was a time of artistic explosion and innovation. The humanistic realism and naturalism of the Renaissance in the arts laid the ground for the development of modernism in the arts. 4. Economy: The Renaissance was a time of economic dynamism and trade. The wealth and prosperity of the West was in no small part a product of the mercantile and industrial spirit of the Renaissance. 5. Legacy: The Renaissance was a time of intense self-reflection and self-criticism. The humanistic self-reflexivity of the Renaissance laid the ground for the development of modern psychology and the social sciences. 6. Culmin\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " [INST] How might someone feel on their first day of work, and what advice would you give them? [/INST] 1. Congratulations on getting your first day of work! That's a great accomplishment. 2. How someone feels on their first day of work can vary a lot and depend on a few factors, so I'll try to give some general advice that should be helpful to most people. 1. Stay positive and professional: It's important to put your best foot forward on your first day of work. Even if you're feeling a little nervous, try to maintain a positive and professional demeanor. 2. Network: Make an effort to network with your team and introduce yourself to others. People are more likely to remember and like those who are proactive in making an effort to build relationships. 3. Prepare: If you have any questions or concerns about your first day of work, it's helpful to prepare by asking your manager or a trusted colleague in advance. 4. Learn: Take the first day as an opportunity to learn and grow. It's normal to make mistakes, but it's important to recognize them and use them as learning experiences. 5. Reflect: Take the time to reflect on your first day of work and what you learned from it. This will help you to identify\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " [INST] What are the recommended treatments for seasonal allergies? [/INST] 1. Antihistorics: These are medications that block the release of histamine, a chemical compound that causes itchy eyes, runny nose, and other symptoms of seasonal allergies. Some common antihistorics include l Loratadine, l Loratapert, l Loratril, l Loratide, l Loratril, l Loratril, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " [INST] How do human activities contribute to climate change, and what are some strategies to mitigate this? [/INST] 1. Individual transportation: The transportation sector is a significant contributor to greenhouse gases, with passenger vehicles responsible for about 37% of U.S. carbon dioxide emissions in the transportation sector. Some strategies to reduce this include:\n", "\n", " - Increasing fuel efficiency of vehicles: Improving the fuel efficiency of vehicles, such as through hybrid-drive, electric, and hydrogen-powered vehicles, can reduce the amount of energy required to power them.\n", " - Car-sharing: Shared mobility services, such as ride-sharing, car-sharing, and bike-sharing, can reduce the need for individual vehicles.\n", " - Active transportation: Walking, biking, and scooting are low-impact, zero-emission, and fun ways to move around.\n", "\n", "2. Energy use in buildings: Commercial and residential buildings are responsible for a significant amount of energy use, with heating and cooling systems being the largest contributor. Some strategies to reduce this include:\n", " - Insulation: Improving insulation, sealing, and ventilation can reduce the amount of energy required to maintain comfortable indoor air temperatures.\n", " - Smart lighting: Smart lighting systems, such as those that use occup\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " [INST] What are the arguments for and against the notion that free will is an illusion? [/INST] 1. The Argument for Free Will: Some philosophers argue that free will is not an illusion, but rather a fundamental part of human nature. They maintain that while our choices may be influenced by various factors, such as our environment, upbring, and experiences, we still have the capacity to make conscious and deliberate decisions. This view holds that while we may have tendencies and biases, we are ultimately responsible for our actions. 2. The Argument Against Free Will: Those who argue that free will is an illusion often point to the pervasiveness of social, economic, and political injustices as evidence of the limitations of human agency. They may argue that the notion of free will is a product of a particular worldview or ideology, and that the extent to which we are able to exercise our will is limited by the structures in which we operate. 3. A Middle Ground: A third position is that while we may not have complete control over our thoughts and actions, we are still to a significant extent responsible for them. This view acknowledges that we are influenced by various factors, but that we still have the capacity to make choices and that the degree of free agency is a matter of interpretation. 4. The Illusion of Free\n", " [INST] How do cultural celebrations differ between Japan and Brazil? [/INST] 1. Japan: Japan's most popular cultural celebration is Bon-Bentoshi, or \"Golden Pot Rice,\" which is a celebration of the first day of the new year. This celebration is marked by families and friends gathering to share in a meal of specially prepared rice and other dishes, while making heartfelt resolutions for the year ahead. Bon-Bentoshi is a time for reflection and setting goals for personal and professional development. 2. Brazil: Brazil's most popular cultural celebration is Carnival, a vibrant and colorful celebration of life, love, and excess. Carnival is a celebration of the arts, with samba, samba de rumba, and other forms of music and dance taking center stage. R revelers don't necessarily make resolutions for the year ahead, as the focus is on enjoying the present moment. 3. Differences: The main difference between Japan and Brazil in terms of cultural celebrations is the focus on the present and future. In Japan, the focus is on reflection and setting goals for the year ahead, while in Brazil, the focus is on the present moment and enjoyment. 4. Informality: In Japan, celebrations tend to be more formal\n" ] } ], "source": [ "from transformers import AutoTokenizer, AutoModelForCausalLM, MixtralForCausalLM\n", "import torch\n", "\n", "model_name_or_path = save_dir\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)\n", "model = MixtralForCausalLM.from_pretrained(model_name_or_path, load_in_8bit=True)\n", "questions = [\n", " \"What was John Holt's vision on education?\",\n", " \"What is the best anime?\",\n", " \"Who is the cutest in Madoka Magica?\",\n", " \"Who is the cutest in Bocchi the Rock’er’s?\",\n", " \"What is the GUNDAM?\",\n", " \"Speak like Elon Musk.\",\n", " \"Can you write a short story about a robot learning to paint?\",\n", " \"If all roses are flowers, and some flowers fade quickly, can we deduce that some roses fade quickly?\",\n", " \"How does a blockchain work, and what are its primary uses?\",\n", " \"What are the main points of debate concerning the use of artificial intelligence in surveillance?\",\n", " \"Discuss the impact of the Renaissance on modern Western society.\",\n", " \"How might someone feel on their first day of work, and what advice would you give them?\",\n", " \"What are the recommended treatments for seasonal allergies?\",\n", " \"How do human activities contribute to climate change, and what are some strategies to mitigate this?\",\n", " \"What are the arguments for and against the notion that free will is an illusion?\",\n", " \"How do cultural celebrations differ between Japan and Brazil?\"\n", "]\n", "# text = \"[INST] What was John Holt's vision on education? [/INST] \"\n", "# text = \"[INST] What is the best anime? [/INST] \"\n", "text = \"[INST] Speak like Elon Musk. [/INST] \"\n", "\n", "for q in questions:\n", " text = f\"[INST] {q} [/INST] \"\n", " inputs = tokenizer(\" \" + text, return_tensors=\"pt\")\n", " outputs = model.generate(**inputs, max_new_tokens=256)\n", " print(tokenizer.decode(outputs[0], skip_special_tokens=True))\n" ] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "A100", "machine_shape": "hm", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "03e7ecf37f274aca895fc2ed4ff60098": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0cd3116202e344029a73b83614195761": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0cfc52330a8b4e45b2797155f55a8e26": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f7f094da7b7f4a73a2c1b432ef9745eb", "placeholder": "​", "style": "IPY_MODEL_0e2de1f1b4fd4a13a77fca4afeee3b21", "value": " 5.00G/5.00G [03:52<00:00, 24.2MB/s]" } }, "0dd559895a754228ac19f4609d741a24": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0e2de1f1b4fd4a13a77fca4afeee3b21": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "0f0a3ee5f28143a7a9a5872243aba76f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_b9d106bfeb504dffbe6dda69fa39cfed", "IPY_MODEL_b3b92f896df44ba3b895abd1178b0d7f", "IPY_MODEL_0cfc52330a8b4e45b2797155f55a8e26" ], "layout": "IPY_MODEL_70bf8a078e7e4c549b5e1bfc0efd6a54" } }, "0f265928ada84c38b2ca168be6e46713": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "105f8e7b2075455b83f2d6dfa9663612": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "11bac08b4fb74986bddbe955cdcf08e2": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "11e38f512a09400b9bd8040e0d9b8c5c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "12e71b6667e14fd4847beac87f98ea09": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "16f58137e6a943639886f936c612a484": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1cb097b425904e90bcb8c582f0f814f3": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_11e38f512a09400b9bd8040e0d9b8c5c", "max": 4949433888, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_1f82e6b28f8840e5bf4ac610e23b873a", "value": 4949433888 } }, "1f82e6b28f8840e5bf4ac610e23b873a": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "1fa1624b018f4e2abdcee0c36c43f51a": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "25f948d6009c4f25bc35aa9be719c352": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_69f6eab8711b4854a7c09a8bdb37c248", "IPY_MODEL_36975c315853484588c5b0630b243324", "IPY_MODEL_fe1ca55b4b80464abde7a563362c7207" ], "layout": "IPY_MODEL_16f58137e6a943639886f936c612a484" } }, "28ca64b28e3149ab89ca33e2946e1b11": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2ff4e957c3354fd9bae4db5e73720d2b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "30380d63efa94a14bb5849d781e39ca6": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "319753e8d1ee45818d7967a5c6e0a2b5": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "3324f9b9ddef47d1bc6df4fad3c204a3": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_458855e95ebd4a54a89ae31ced0aaafc", "placeholder": "​", "style": "IPY_MODEL_836438ed3c11461187b093267bd40334", "value": " 33/33 [09:35<00:00, 17.40s/it]" } }, "337033512b21420ea0432edeb0474833": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "367c8e22c8e443fcafc507635ff35fa8": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_03e7ecf37f274aca895fc2ed4ff60098", "placeholder": "​", "style": "IPY_MODEL_9da7131265c64659a36588ea9e57b297", "value": " 4.95G/4.95G [03:48<00:00, 33.3MB/s]" } }, "36975c315853484588c5b0630b243324": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6c6bed8aa9124a7790942629fec64a41", "max": 4949433888, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_aa0bbc6feead49a1a4b3b9f77b73468d", "value": 4949433888 } }, "37ad07e024e0470db87d1f431743ba9c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "38c6fc9eedca4cbea533769e9d12fbda": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "3bb4efdbd4ab4b6e8cbf6fb7b3180522": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "3dc795ff4a3440b0a0fbaf69b0522dab": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3e2031caf56b45ac92e89b6f76e89bcc": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_47af7599b51d4c26bb5b7b9b67dfb8be", "placeholder": "​", "style": "IPY_MODEL_38c6fc9eedca4cbea533769e9d12fbda", "value": "Upload 10 LFS files: 100%" } }, "3eeb889354ce47c98a98d16660a91c8a": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3ef63707e6c2464d984ce11c6a9ff793": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_ce30eeffc012411f85310ce36e9b92a6", "IPY_MODEL_7d300cffd6d94be590092749ad011e6b", "IPY_MODEL_559bd4eeb9454ca2975971d878b802c0" ], "layout": "IPY_MODEL_c10528c58f8748969573a93f3fd5d241" } }, "438635a12ba74c049f5c869fbd3b22d6": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "458855e95ebd4a54a89ae31ced0aaafc": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "46a13835971d499b957c6bf4b1b20e94": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "47af7599b51d4c26bb5b7b9b67dfb8be": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "48b8816ad4a447d5a046a89ffeb74625": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4aabc94eb1cd409eb81e68e38a346cac": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "4b7d7b20bcb346789230a90d2416dcb9": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_87e989b482114d78b6bd01ade793dd47", "placeholder": "​", "style": "IPY_MODEL_1fa1624b018f4e2abdcee0c36c43f51a", "value": "model-00002-of-00010.safetensors: 100%" } }, "4d573d432c3b4290acf8f19ba38f2cba": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_4b7d7b20bcb346789230a90d2416dcb9", "IPY_MODEL_d7e4123016e341c79f909ffbe44804c8", "IPY_MODEL_6da70ee433954ead8e2376396f8b4a74" ], "layout": "IPY_MODEL_6320884ab10f4748871241e0226d072d" } }, "4de02ec9fd534fa2bd49238be57d6f99": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "4e2156713f22457fa8394409fce6fed0": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_28ca64b28e3149ab89ca33e2946e1b11", "max": 4915929288, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_337033512b21420ea0432edeb0474833", "value": 4915929288 } }, "4f929ecf23834dafba58e0dc0cb7f757": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "511178d6b3864b08b43b1344c89ca487": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "51da58b29eef4dec8a4a4bb9181ca8b5": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "521212eb8acb42d8966c2a10206b53ac": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_3e2031caf56b45ac92e89b6f76e89bcc", "IPY_MODEL_d9841d996902489895c99fddac0b0544", "IPY_MODEL_dcd176ad22c044e7810e1fae8f4dafdd" ], "layout": "IPY_MODEL_e347622fe3a54d05acc7e55a637c0517" } }, "559bd4eeb9454ca2975971d878b802c0": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_11bac08b4fb74986bddbe955cdcf08e2", "placeholder": "​", "style": "IPY_MODEL_76f5de7f1724458c80a7beaf128f152e", "value": " 4.98G/4.98G [03:47<00:00, 21.3MB/s]" } }, "567f05fb64094c9bab4459a09fa8e4f6": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e63101d2bdbe4b8697720937c05e6d08", "placeholder": "​", "style": "IPY_MODEL_695e50a4ca1941bd82fe8919f36991c6", "value": "model-00005-of-00010.safetensors: 100%" } }, "5bdaea5ad5af46d3a6ddbd7a54ad151c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5e1ed962cd7e41cd99bc444ffbf99c1a": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "61000c3336ae4fbeb3d616c71c059b67": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "62c9ee5249e845b5bea4a92f2ee497a1": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6320884ab10f4748871241e0226d072d": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "67a7ba1d0b934778901a6f53a95fe6f4": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6907ffb042d2450a86e457407c1f37ec": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_9e2da7aeeb5846f7bc4ea4c9c3c10002", "IPY_MODEL_4e2156713f22457fa8394409fce6fed0", "IPY_MODEL_a3f8c4ada50942a0ae128b01e84dad4e" ], "layout": "IPY_MODEL_939314c517c8416b95c18c4e15a67cba" } }, "695e50a4ca1941bd82fe8919f36991c6": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "69f6eab8711b4854a7c09a8bdb37c248": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c51f8a89142a4170bc9750f1beb783e9", "placeholder": "​", "style": "IPY_MODEL_a239360f82cd4379a8ccca1782c653af", "value": "model-00009-of-00010.safetensors: 100%" } }, "6b77daaffa6c4d8fb3538eb71621c5ff": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6be374cc1fc04d1baf960e8add1f57ee": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "6c6bed8aa9124a7790942629fec64a41": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6c7d7a59caef4f4192e6443edfe83e07": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6da70ee433954ead8e2376396f8b4a74": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c9d21040aa1a4a7cbd15d87745959e54", "placeholder": "​", "style": "IPY_MODEL_ed39ec52eb9248f4aeb676f42083abfd", "value": " 4.95G/4.95G [03:36<00:00, 30.8MB/s]" } }, "6df0856ea34d41db95102781e7a716ea": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_a5365fb5a8a84bc69db87096d1fc6254", "IPY_MODEL_b9b08a264e6d4fe5a61382d62b46bd2b", "IPY_MODEL_b61d3d05d47942378cbfce4241fc025b" ], "layout": "IPY_MODEL_37ad07e024e0470db87d1f431743ba9c" } }, "70bf8a078e7e4c549b5e1bfc0efd6a54": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "716a33208b7448b0821d5d01cb589dcf": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "75af128291e64522a9655a57fc8a2cc3": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_eac730263a144f85a1f257f8cd6cecf9", "IPY_MODEL_1cb097b425904e90bcb8c582f0f814f3", "IPY_MODEL_8951899d9524445c9f19d244c32ce276" ], "layout": "IPY_MODEL_cf2c531a58f143889058bf6e35ae6e31" } }, "76f5de7f1724458c80a7beaf128f152e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "77cc530c3c8f4d19b7805bf0ddb91f41": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_716a33208b7448b0821d5d01cb589dcf", "placeholder": "​", "style": "IPY_MODEL_ffc72487d9b44fb5bb967d807607545f", "value": "Loading checkpoint shards: 100%" } }, "7bcb35f554dd481d958788b859a1667a": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "7d300cffd6d94be590092749ad011e6b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_dd46ff306b814963a1476a5d7bcb623d", "max": 4983021328, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_4aabc94eb1cd409eb81e68e38a346cac", "value": 4983021328 } }, "836438ed3c11461187b093267bd40334": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "8470366afe4b4f12a8e2b347b8ec6562": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "87e989b482114d78b6bd01ade793dd47": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8951899d9524445c9f19d244c32ce276": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0f265928ada84c38b2ca168be6e46713", "placeholder": "​", "style": "IPY_MODEL_7bcb35f554dd481d958788b859a1667a", "value": " 4.95G/4.95G [03:56<00:00, 21.7MB/s]" } }, "8a961fe624a04da4b6ee51f5ee6b7218": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6c7d7a59caef4f4192e6443edfe83e07", "placeholder": "​", "style": "IPY_MODEL_319753e8d1ee45818d7967a5c6e0a2b5", "value": " 4.94G/4.94G [03:34<00:00, 22.1MB/s]" } }, "8f8426695bcb4e279d7bb3bcfdbd8996": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "90494fbb50b74227a1649ff33065c83f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "930e75338cbd46b08d28f83d250c01f4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "939314c517c8416b95c18c4e15a67cba": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "97556b098ff34e73b5f9055c8ad2d2ac": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_48b8816ad4a447d5a046a89ffeb74625", "placeholder": "​", "style": "IPY_MODEL_51da58b29eef4dec8a4a4bb9181ca8b5", "value": " 4.95G/4.95G [03:45<00:00, 18.2MB/s]" } }, "9971e27b87ba4f668e090bfdff904d3b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_67a7ba1d0b934778901a6f53a95fe6f4", "max": 4943175440, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_61000c3336ae4fbeb3d616c71c059b67", "value": 4943175440 } }, "9da7131265c64659a36588ea9e57b297": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "9da8146295cb4925884a7d4fb198dc48": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "9e2da7aeeb5846f7bc4ea4c9c3c10002": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_12e71b6667e14fd4847beac87f98ea09", "placeholder": "​", "style": "IPY_MODEL_b220bd0f99704fc794ad880e3ddd92a2", "value": "model-00004-of-00010.safetensors: 100%" } }, "a1935d7fcde642419092beeb06e6520f": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a239360f82cd4379a8ccca1782c653af": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "a283226d5b7e4784b2587036652530c1": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "a2fdf6aed8564c169edf9225af0d957b": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a3af040acc344c6b99d3b8b9af1f4ad2": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a3f8c4ada50942a0ae128b01e84dad4e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f71573d76f344d18859f003971037bd3", "placeholder": "​", "style": "IPY_MODEL_90494fbb50b74227a1649ff33065c83f", "value": " 4.92G/4.92G [03:43<00:00, 21.5MB/s]" } }, "a5365fb5a8a84bc69db87096d1fc6254": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f5707fe93496460ea575e53136efd41d", "placeholder": "​", "style": "IPY_MODEL_6be374cc1fc04d1baf960e8add1f57ee", "value": "model-00010-of-00010.safetensors: 100%" } }, "a5bcb6ab4aef4e6cbd49dc1e4204a45f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4f929ecf23834dafba58e0dc0cb7f757", "max": 4949433824, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_e0e7d8cae56743cc8774999b56dbab11", "value": 4949433824 } }, "a6075aa74ca647a996cc89906aba7c8f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_438635a12ba74c049f5c869fbd3b22d6", "max": 4949433888, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_a283226d5b7e4784b2587036652530c1", "value": 4949433888 } }, "a8fd1ac0457548babfe40965687ce31e": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "aa0bbc6feead49a1a4b3b9f77b73468d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "b19f6f78371c49b2bd6b41674844c502": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_d73c6975e62f406a8ad70e51dee3b048", "IPY_MODEL_a5bcb6ab4aef4e6cbd49dc1e4204a45f", "IPY_MODEL_367c8e22c8e443fcafc507635ff35fa8" ], "layout": "IPY_MODEL_30380d63efa94a14bb5849d781e39ca6" } }, "b220bd0f99704fc794ad880e3ddd92a2": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "b3b92f896df44ba3b895abd1178b0d7f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0dd559895a754228ac19f4609d741a24", "max": 4999782496, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_3bb4efdbd4ab4b6e8cbf6fb7b3180522", "value": 4999782496 } }, "b61d3d05d47942378cbfce4241fc025b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a2fdf6aed8564c169edf9225af0d957b", "placeholder": "​", "style": "IPY_MODEL_8470366afe4b4f12a8e2b347b8ec6562", "value": " 3.72G/3.72G [02:54<00:00, 23.5MB/s]" } }, "b65b5386369b4b1ea497489e1f5889f3": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b9b08a264e6d4fe5a61382d62b46bd2b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_46a13835971d499b957c6bf4b1b20e94", "max": 3718379112, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_d925cb3514a64d52b60de8b7080eccff", "value": 3718379112 } }, "b9d106bfeb504dffbe6dda69fa39cfed": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0cd3116202e344029a73b83614195761", "placeholder": "​", "style": "IPY_MODEL_f2d0be892bb241a2a4eb48ea381ba169", "value": "model-00007-of-00010.safetensors: 100%" } }, "c10528c58f8748969573a93f3fd5d241": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c51f8a89142a4170bc9750f1beb783e9": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c7aeaad54b894f9996ff419eace56e9a": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "c9d21040aa1a4a7cbd15d87745959e54": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "cd49c0851dfa46c9b5deb13093b48c52": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_77cc530c3c8f4d19b7805bf0ddb91f41", "IPY_MODEL_ea200cf0136c4cadbe172a29a6062ec3", "IPY_MODEL_3324f9b9ddef47d1bc6df4fad3c204a3" ], "layout": "IPY_MODEL_a3af040acc344c6b99d3b8b9af1f4ad2" } }, "ce30eeffc012411f85310ce36e9b92a6": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f05b16bf2a42446194384ca893164e0f", "placeholder": "​", "style": "IPY_MODEL_5e1ed962cd7e41cd99bc444ffbf99c1a", "value": "model-00008-of-00010.safetensors: 100%" } }, "cf2c531a58f143889058bf6e35ae6e31": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d73c6975e62f406a8ad70e51dee3b048": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3dc795ff4a3440b0a0fbaf69b0522dab", "placeholder": "​", "style": "IPY_MODEL_4de02ec9fd534fa2bd49238be57d6f99", "value": "model-00003-of-00010.safetensors: 100%" } }, "d7e4123016e341c79f909ffbe44804c8": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6b77daaffa6c4d8fb3538eb71621c5ff", "max": 4949433824, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_8f8426695bcb4e279d7bb3bcfdbd8996", "value": 4949433824 } }, "d925cb3514a64d52b60de8b7080eccff": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "d9841d996902489895c99fddac0b0544": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3eeb889354ce47c98a98d16660a91c8a", "max": 10, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_9da8146295cb4925884a7d4fb198dc48", "value": 10 } }, "dc15268697564e779b368906edf5bc2e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "dcd176ad22c044e7810e1fae8f4dafdd": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_105f8e7b2075455b83f2d6dfa9663612", "placeholder": "​", "style": "IPY_MODEL_e665b33ad0604f05a828d752b8ad20ca", "value": " 10/10 [07:43<00:00, 30.83s/it]" } }, "dd46ff306b814963a1476a5d7bcb623d": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e0e7d8cae56743cc8774999b56dbab11": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "e347622fe3a54d05acc7e55a637c0517": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e373445e07614e8487913398fc4112d6": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_511178d6b3864b08b43b1344c89ca487", "placeholder": "​", "style": "IPY_MODEL_dc15268697564e779b368906edf5bc2e", "value": "model-00001-of-00010.safetensors: 100%" } }, "e63101d2bdbe4b8697720937c05e6d08": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e665b33ad0604f05a828d752b8ad20ca": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ea200cf0136c4cadbe172a29a6062ec3": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a1935d7fcde642419092beeb06e6520f", "max": 33, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_2ff4e957c3354fd9bae4db5e73720d2b", "value": 33 } }, "eac730263a144f85a1f257f8cd6cecf9": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_62c9ee5249e845b5bea4a92f2ee497a1", "placeholder": "​", "style": "IPY_MODEL_930e75338cbd46b08d28f83d250c01f4", "value": "model-00006-of-00010.safetensors: 100%" } }, "ed39ec52eb9248f4aeb676f42083abfd": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f05b16bf2a42446194384ca893164e0f": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f2d0be892bb241a2a4eb48ea381ba169": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f5707fe93496460ea575e53136efd41d": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f71573d76f344d18859f003971037bd3": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f7f094da7b7f4a73a2c1b432ef9745eb": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "fc6a3ced382b4c2483912ab1762c1f9c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_e373445e07614e8487913398fc4112d6", "IPY_MODEL_9971e27b87ba4f668e090bfdff904d3b", "IPY_MODEL_8a961fe624a04da4b6ee51f5ee6b7218" ], "layout": "IPY_MODEL_a8fd1ac0457548babfe40965687ce31e" } }, "fe1ca55b4b80464abde7a563362c7207": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5bdaea5ad5af46d3a6ddbd7a54ad151c", "placeholder": "​", "style": "IPY_MODEL_c7aeaad54b894f9996ff419eace56e9a", "value": " 4.95G/4.95G [04:05<00:00, 21.1MB/s]" } }, "ff85c527ccf24f3492986ab939dc904b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_567f05fb64094c9bab4459a09fa8e4f6", "IPY_MODEL_a6075aa74ca647a996cc89906aba7c8f", "IPY_MODEL_97556b098ff34e73b5f9055c8ad2d2ac" ], "layout": "IPY_MODEL_b65b5386369b4b1ea497489e1f5889f3" } }, "ffc72487d9b44fb5bb967d807607545f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "nbformat": 4, "nbformat_minor": 0 }