Spaces:

towardsai-tutors
/

ai-tutor-chatbot

Running

App Files Files Community

Omar Solano commited on Aug 5, 2024

Commit

030fa83

1 Parent(s): 7c97045

add long context caching nb

Browse files

Files changed (1) hide show

notebooks/Long_Context_Caching_vs_RAG.ipynb +1278 -0

notebooks/Long_Context_Caching_vs_RAG.ipynb ADDED Viewed

	@@ -0,0 +1,1278 @@

+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/towardsai/ai-tutor-rag-system/blob/main/notebooks/03-RAG_with_LlamaIndex.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "v9bpz99INAc1"
+      },
+      "source": [
+        "# Install Packages and Setup Variables\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "BeuFJKlj9jKz",
+        "outputId": "6419987a-aa8c-49f8-de20-42aa9d7528c3"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -q llama-index==0.10.57 llama-index-llms-gemini==0.1.11 openai==1.37.0 google-generativeai==0.7.2"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "id": "CWholrWlt2OQ"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import time\n",
+        "from IPython.display import Markdown, display\n",
+        "\n",
+        "# Set the following API Keys in the Python environment. Will be used later.\n",
+        "# We use OpenAI for the embedding model and Gemini-1.5-flash as our LLM.\n",
+        "os.environ[\"OPENAI_API_KEY\"] = \"<YOUR_OPENAI_KEY>\"\n",
+        "os.environ[\"GOOGLE_API_KEY\"] = \"<YOUR_API_KEY>\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "f5eV5EnvNCMM"
+      },
+      "source": [
+        "# Load Dataset\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "q-7mRQ-mNJlm"
+      },
+      "source": [
+        "## Download\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3PsdOdMUNmEi"
+      },
+      "source": [
+        "The dataset includes a subset of the documentation from the Llama-index library.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "3ImRCP7pACaI",
+        "outputId": "ff52cd9a-67e0-4243-9774-98288c3cf248"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
+            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
+            "100  570k  100  570k    0     0  3407k      0 --:--:-- --:--:-- --:--:-- 3417k\n"
+          ]
+        }
+      ],
+      "source": [
+        "!curl -o ./llama_index_150k.jsonl https://huggingface.co/datasets/towardsai-buster/llama-index-docs/raw/main/llama_index_data_150k.jsonl"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bZZLK_wyEc-L"
+      },
+      "source": [
+        "## Read File and create LlamaIndex Documents\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "miUqycqAEfr7",
+        "outputId": "6c3068a9-a9a3-465a-8f84-8d329e0cd02a"
+      },
+      "outputs": [],
+      "source": [
+        "from llama_index.core import Document\n",
+        "import json\n",
+        "\n",
+        "\n",
+        "def create_docs(input_file: str) -> list[Document]:\n",
+        "    with open(input_file, \"r\") as f:\n",
+        "        documents = []\n",
+        "        for line in f:\n",
+        "            data = json.loads(line)\n",
+        "\n",
+        "            documents.append(\n",
+        "                Document(\n",
+        "                    doc_id=data[\"doc_id\"],\n",
+        "                    text=data[\"content\"],\n",
+        "                    metadata={  # type: ignore\n",
+        "                        \"url\": data[\"url\"],\n",
+        "                        \"title\": data[\"name\"],\n",
+        "                        \"tokens\": data[\"tokens\"],\n",
+        "                        \"source\": data[\"source\"],\n",
+        "                    },\n",
+        "                    excluded_llm_metadata_keys=[\n",
+        "                        \"title\",\n",
+        "                        \"tokens\",\n",
+        "                        \"source\",\n",
+        "                    ],\n",
+        "                    excluded_embed_metadata_keys=[\n",
+        "                        \"url\",\n",
+        "                        \"tokens\",\n",
+        "                        \"source\",\n",
+        "                    ],\n",
+        "                )\n",
+        "            )\n",
+        "    return documents"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "f86yksB9K571"
+      },
+      "source": [
+        "# Generate Embedding\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {
+        "id": "iXrr5-tnEfm9"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Number of documents: 56\n"
+          ]
+        }
+      ],
+      "source": [
+        "from llama_index.core import Document\n",
+        "\n",
+        "# Convert the texts to Document objects so the LlamaIndex framework can process them.\n",
+        "documents = create_docs(\"llama_index_150k.jsonl\")\n",
+        "print(\"Number of documents:\", len(documents))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 81,
+          "referenced_widgets": [
+            "6e893cde79734e408bb8d0b4305bedab",
+            "51242f18dfd14aba963ed72b008d6dd6",
+            "a88124e34ad24f19bdcbcd73e998168a",
+            "fff2627bdf20445f8507a7792a17546d",
+            "f5f3f69abfd149f281a2f0c3f58d3284",
+            "d1a558eb15cf43f8a013a91b9262eee5",
+            "946ebbd88b344a248564a1b2c593653e",
+            "4e905c17eddc44c299aabf699ec33642",
+            "ab738a29078d43aaa3364b3076f1eca0",
+            "ae615040ed1a4a47838aaa99192fd33b",
+            "7e3db69b3e20451f8fc88631b7915a39",
+            "27fd17bf0eaa49868321cf2d31a5a0a1",
+            "a0ba4f46f20b435cb6b811317a935b1e",
+            "4026c7a3aead4dc1bb0525535c885601",
+            "8ab7550005bf4d8f80c87716c769e2ec",
+            "3e0e3f06c25543e9877d30ed378edd8d",
+            "4a766f37197b41d7bfa496c0c6d393bf",
+            "a436c3949572481cbde16838298cbf93",
+            "ab59db85ad504297a3c56e3d63f5d474",
+            "2b3e4d550bce4effb83939e026ea6538",
+            "93e9287c92034d36a44a3855f38ef6d8",
+            "12380f5aab5e4c41843036e4f12883cd"
+          ]
+        },
+        "id": "Bsa7Q-DoNWBk",
+        "outputId": "b6f4f826-e4cd-4745-fc99-13b91c2d4d1b"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/Users/omar/Documents/ai_repos/ai-tutor-rag-system/env/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+            "  from .autonotebook import tqdm as notebook_tqdm\n",
+            "Parsing nodes: 100%|██████████| 56/56 [00:00<00:00, 181.20it/s]\n",
+            "Generating embeddings: 100%|██████████| 375/375 [00:05<00:00, 74.36it/s]\n"
+          ]
+        }
+      ],
+      "source": [
+        "from llama_index.core import VectorStoreIndex\n",
+        "from llama_index.core.node_parser import SentenceSplitter\n",
+        "from llama_index.embeddings.openai import OpenAIEmbedding\n",
+        "\n",
+        "\n",
+        "# Build index / generate embeddings using OpenAI embedding model\n",
+        "index = VectorStoreIndex.from_documents(\n",
+        "    documents,\n",
+        "    embed_model=OpenAIEmbedding(model=\"text-embedding-3-small\"),\n",
+        "    transformations=[SentenceSplitter(chunk_size=800, chunk_overlap=400)],\n",
+        "    show_progress=True,\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3DoUxd8KK--Q"
+      },
+      "source": [
+        "# Query Dataset\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "metadata": {
+        "id": "bUaNH97dEfh9"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "I0000 00:00:1722879021.990521 1763413 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Define a query engine that is responsible for retrieving related pieces of text,\n",
+        "# and using a LLM to formulate the final answer.\n",
+        "\n",
+        "from llama_index.llms.gemini import Gemini\n",
+        "\n",
+        "llm = Gemini(model=\"models/gemini-1.5-flash\", temperature=1, max_tokens=1000)\n",
+        "\n",
+        "query_engine = index.as_query_engine(llm=llm, similarity_top_k=10)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "KHK4V_GRR6ZG",
+        "outputId": "105cf2b3-3a65-4eb7-f629-38ce22bb20aa"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "I0000 00:00:1722879022.480648 1763413 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported\n"
+          ]
+        },
+        {
+          "data": {
+            "text/markdown": [
+              "To set up a query engine in code, first create an index from your documents. Then, use the index to create a query engine. You can then query the query engine using the `query` method. \n"
+            ],
+            "text/plain": [
+              "<IPython.core.display.Markdown object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "time taken:  3.4835610389709473\n"
+          ]
+        }
+      ],
+      "source": [
+        "start = time.time()\n",
+        "response = query_engine.query(\"How to setup a query engine in code?\")\n",
+        "end = time.time()\n",
+        "display(Markdown(response.response))\n",
+        "print(\"time taken: \", end - start)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 10,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "S-BmyTBbNd9y",
+        "outputId": "662f49d2-8c19-400a-c7fd-dd0018dcd74e"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/markdown": [
+              "An agent can be set up in code by defining a set of tools and providing them to a `ReActAgent` implementation.\n"
+            ],
+            "text/plain": [
+              "<IPython.core.display.Markdown object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "time taken:  3.3619420528411865\n"
+          ]
+        }
+      ],
+      "source": [
+        "start = time.time()\n",
+        "response = query_engine.query(\"How to setup an agent in code?\")\n",
+        "end = time.time()\n",
+        "display(Markdown(response.response))\n",
+        "print(\"time taken: \", end - start)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Setup Long Context Caching\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "For this section, we will be using the Gemini API\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 11,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Import the Python SDK\n",
+        "import google.generativeai as genai\n",
+        "from google.generativeai import caching\n",
+        "from google.generativeai import GenerationConfig\n",
+        "\n",
+        "genai.configure(api_key=os.environ[\"GOOGLE_API_KEY\"])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Convert the jsonl file to a text file for the Gemini API"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 12,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Contents saved to llama_index_contents.txt\n"
+          ]
+        }
+      ],
+      "source": [
+        "import json\n",
+        "\n",
+        "\n",
+        "def create_text_file(input_file: str, output_file: str) -> None:\n",
+        "    with open(input_file, \"r\") as f, open(output_file, \"w\") as out:\n",
+        "        for line in f:\n",
+        "            data = json.loads(line)\n",
+        "            out.write(data[\"content\"] + \"\\n\\n\")  # Add two newlines between documents\n",
+        "\n",
+        "    print(f\"Contents saved to {output_file}\")\n",
+        "\n",
+        "\n",
+        "create_text_file(\"llama_index_150k.jsonl\", \"llama_index_contents.txt\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "document = genai.upload_file(path=\"llama_index_contents.txt\")\n",
+        "model_name = \"gemini-1.5-flash-001\"\n",
+        "\n",
+        "cache = genai.caching.CachedContent.create(\n",
+        "    model=model_name,\n",
+        "    system_instruction=\"You answer questions about the LlamaIndex framework.\",\n",
+        "    contents=[document],\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 14,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/markdown": [
+              "Here's a breakdown of how to set up a query engine in LlamaIndex, along with different methods and explanations:\n",
+              "\n",
+              "**1.  The Most Common Approach: Using an Index**\n",
+              "\n",
+              "   The simplest way to get a `QueryEngine` is to leverage an existing `Index` object. Each index type in LlamaIndex has an `as_query_engine()` method that creates a specialized engine for that index:\n",
+              "\n",
+              "   ```python\n",
+              "   from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
+              "\n",
+              "   # Load your data\n",
+              "   documents = SimpleDirectoryReader(\"data\").load_data() \n",
+              "\n",
+              "   # Create a VectorStoreIndex\n",
+              "   index = VectorStoreIndex.from_documents(documents) \n",
+              "\n",
+              "   # Get a query engine\n",
+              "   query_engine = index.as_query_engine() \n",
+              "\n",
+              "   # Now you can use the query engine to ask questions\n",
+              "   response = query_engine.query(\"What is the main point of this document?\")\n",
+              "   print(response)\n",
+              "   ```\n",
+              "\n",
+              "**2.  Customization Through Composition: Advanced Query Engines**\n",
+              "\n",
+              "   For fine-grained control, you can build a `QueryEngine` from its component parts using the `RetrieverQueryEngine`:\n",
+              "\n",
+              "   ```python\n",
+              "   from llama_index.core import VectorStoreIndex, get_response_synthesizer\n",
+              "   from llama_index.core.retrievers import VectorIndexRetriever\n",
+              "   from llama_index.core.query_engine import RetrieverQueryEngine\n",
+              "   from llama_index.core.postprocessor import SimilarityPostprocessor\n",
+              "\n",
+              "   # Build your index (as above)\n",
+              "   index = VectorStoreIndex.from_documents(documents) \n",
+              "\n",
+              "   # Configure the retriever\n",
+              "   retriever = VectorIndexRetriever(\n",
+              "       index=index,\n",
+              "       similarity_top_k=10, \n",
+              "   )\n",
+              "\n",
+              "   # Configure the response synthesizer (the core LLM)\n",
+              "   response_synthesizer = get_response_synthesizer()\n",
+              "\n",
+              "   # Assemble the query engine\n",
+              "   query_engine = RetrieverQueryEngine(\n",
+              "       retriever=retriever,\n",
+              "       response_synthesizer=response_synthesizer,\n",
+              "       node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],\n",
+              "   )\n",
+              "\n",
+              "   # Query the engine\n",
+              "   response = query_engine.query(\"What are the key takeaways from this data?\")\n",
+              "   print(response)\n",
+              "   ```\n",
+              "\n",
+              "**Key Components and Customization:**\n",
+              "\n",
+              "* **Retrieval:**  How your engine finds relevant information from the index (e.g., top-k semantic search, keyword matching, etc.).\n",
+              "* **Postprocessing:**  Additional steps to refine the retrieved results (e.g., reranking, filtering based on metadata, etc.).\n",
+              "* **Response Synthesis:** The LLM used to generate the final response (e.g., OpenAI's GPT-3.5, a local model, etc.).\n",
+              "* **Prompt Engineering:**  Crafting effective prompts to guide your LLM in synthesizing a meaningful answer.\n",
+              "\n",
+              "**Types of Query Engines:**\n",
+              "\n",
+              "* **RetrieverQueryEngine:** Combines a retriever and response synthesizer for standard question answering.\n",
+              "* **SubQuestionQueryEngine:** Decomposes a complex query into sub-queries, especially suited for multi-document analysis and compare/contrast scenarios.\n",
+              "* **RouterQueryEngine:** Routes a query to the most appropriate index or data source, especially helpful when you have a heterogeneous collection of information.\n",
+              "\n",
+              "**Choosing the Right Approach:**\n",
+              "\n",
+              "* For straightforward scenarios, using an index's `as_query_engine()` method is the easiest option.\n",
+              "* When you need finer control over retrieval, postprocessing, or the LLM used, create a `RetrieverQueryEngine` and customize its components.\n",
+              "\n",
+              "Let me know if you'd like to see a specific type of query engine setup or have more advanced use cases in mind! \n"
+            ],
+            "text/plain": [
+              "<IPython.core.display.Markdown object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "time taken:  32.33650302886963\n"
+          ]
+        }
+      ],
+      "source": [
+        "model = genai.GenerativeModel.from_cached_content(cache)\n",
+        "start = time.time()\n",
+        "response = model.generate_content(\n",
+        "    \"How to setup a query engine in code?\",\n",
+        "    generation_config=GenerationConfig(max_output_tokens=1000),\n",
+        ")\n",
+        "end = time.time()\n",
+        "display(Markdown(response.text))\n",
+        "print(\"time taken: \", end - start)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.4"
+    },
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "12380f5aab5e4c41843036e4f12883cd": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "27fd17bf0eaa49868321cf2d31a5a0a1": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_a0ba4f46f20b435cb6b811317a935b1e",
+              "IPY_MODEL_4026c7a3aead4dc1bb0525535c885601",
+              "IPY_MODEL_8ab7550005bf4d8f80c87716c769e2ec"
+            ],
+            "layout": "IPY_MODEL_3e0e3f06c25543e9877d30ed378edd8d"
+          }
+        },
+        "2b3e4d550bce4effb83939e026ea6538": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "3e0e3f06c25543e9877d30ed378edd8d": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "4026c7a3aead4dc1bb0525535c885601": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_ab59db85ad504297a3c56e3d63f5d474",
+            "max": 56,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_2b3e4d550bce4effb83939e026ea6538",
+            "value": 56
+          }
+        },
+        "4a766f37197b41d7bfa496c0c6d393bf": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "4e905c17eddc44c299aabf699ec33642": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "51242f18dfd14aba963ed72b008d6dd6": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_d1a558eb15cf43f8a013a91b9262eee5",
+            "placeholder": "",
+            "style": "IPY_MODEL_946ebbd88b344a248564a1b2c593653e",
+            "value": "Parsing nodes: 100%"
+          }
+        },
+        "6e893cde79734e408bb8d0b4305bedab": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_51242f18dfd14aba963ed72b008d6dd6",
+              "IPY_MODEL_a88124e34ad24f19bdcbcd73e998168a",
+              "IPY_MODEL_fff2627bdf20445f8507a7792a17546d"
+            ],
+            "layout": "IPY_MODEL_f5f3f69abfd149f281a2f0c3f58d3284"
+          }
+        },
+        "7e3db69b3e20451f8fc88631b7915a39": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "8ab7550005bf4d8f80c87716c769e2ec": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_93e9287c92034d36a44a3855f38ef6d8",
+            "placeholder": "",
+            "style": "IPY_MODEL_12380f5aab5e4c41843036e4f12883cd",
+            "value": " 56/56 [00:01&lt;00:00, 36.56it/s]"
+          }
+        },
+        "93e9287c92034d36a44a3855f38ef6d8": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "946ebbd88b344a248564a1b2c593653e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "a0ba4f46f20b435cb6b811317a935b1e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_4a766f37197b41d7bfa496c0c6d393bf",
+            "placeholder": "",
+            "style": "IPY_MODEL_a436c3949572481cbde16838298cbf93",
+            "value": "Generating embeddings: 100%"
+          }
+        },
+        "a436c3949572481cbde16838298cbf93": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "a88124e34ad24f19bdcbcd73e998168a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_4e905c17eddc44c299aabf699ec33642",
+            "max": 14,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_ab738a29078d43aaa3364b3076f1eca0",
+            "value": 14
+          }
+        },
+        "ab59db85ad504297a3c56e3d63f5d474": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "ab738a29078d43aaa3364b3076f1eca0": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "ae615040ed1a4a47838aaa99192fd33b": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "d1a558eb15cf43f8a013a91b9262eee5": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "f5f3f69abfd149f281a2f0c3f58d3284": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "fff2627bdf20445f8507a7792a17546d": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_ae615040ed1a4a47838aaa99192fd33b",
+            "placeholder": "",
+            "style": "IPY_MODEL_7e3db69b3e20451f8fc88631b7915a39",
+            "value": " 14/14 [00:00&lt;00:00, 78.64it/s]"
+          }
+        }
+      }
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}