Spaces:

towardsai-tutors
/

ai-tutor-chatbot

Running

App Files Files Community

AlaFalaki commited on Dec 25, 2023

Commit

ec567ac

unverified ·

1 Parent(s): 92599f1

Delete notebook/03-RAG_with_LlamaIndex.ipynb

Browse files

Files changed (1) hide show

notebook/03-RAG_with_LlamaIndex.ipynb +0 -265

notebook/03-RAG_with_LlamaIndex.ipynb DELETED Viewed

@@ -1,265 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": [],
-      "authorship_tag": "ABX9TyOqdnl91jxcohWthYkUL09p",
-      "include_colab_link": true
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
-      },
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/towardsai/ai-tutor-rag-system/blob/main/notebook/03-RAG_with_LlamaIndex.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 1,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "BeuFJKlj9jKz",
-        "outputId": "4c3a9772-cb7d-4fc1-d0e4-64186861e3e5"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m12.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m225.4/225.4 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.7/51.7 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m35.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.9/75.9 kB\u001b[0m \u001b[31m1.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m35.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.0/143.0 kB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.9/76.9 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
-            "tensorflow-probability 0.22.0 requires typing-extensions<4.6.0, but you have typing-extensions 4.9.0 which is incompatible.\u001b[0m\u001b[31m\n",
-            "\u001b[0m"
-          ]
-        }
-      ],
-      "source": [
-        "!pip install -q llama-index==0.9.21 openai==1.6.0 cohere==4.39 tiktoken==0.5.2"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import os\n",
-        "\n",
-        "os.environ[\"OPENAI_API_KEY\"] = \"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\""
-      ],
-      "metadata": {
-        "id": "XuzgSNqcABpV"
-      },
-      "execution_count": 4,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "!wget https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-dataset.json"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "3ImRCP7pACaI",
-        "outputId": "9a63bdea-54f7-4923-ccbb-cab03b312774"
-      },
-      "execution_count": 5,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "--2023-12-25 17:33:36--  https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-dataset.json\n",
-            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.111.133, ...\n",
-            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
-            "HTTP request sent, awaiting response... 200 OK\n",
-            "Length: 25361 (25K) [text/plain]\n",
-            "Saving to: ‘mini-dataset.json’\n",
-            "\n",
-            "mini-dataset.json   100%[===================>]  24.77K  --.-KB/s    in 0.006s  \n",
-            "\n",
-            "2023-12-25 17:33:37 (3.76 MB/s) - ‘mini-dataset.json’ saved [25361/25361]\n",
-            "\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Read JSON"
-      ],
-      "metadata": {
-        "id": "bZZLK_wyEc-L"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import json\n",
-        "\n",
-        "with open('./mini-dataset.json', 'r') as file:\n",
-        "    data = json.load(file)"
-      ],
-      "metadata": {
-        "id": "PBk0zgq6ACXA"
-      },
-      "execution_count": 15,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "len( data['chunks'] )"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "miUqycqAEfr7",
-        "outputId": "10005d5f-15c0-4565-a58a-6cb7e466acb4"
-      },
-      "execution_count": 16,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "22"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 16
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "texts = [item['text'] for item in data['chunks']]"
-      ],
-      "metadata": {
-        "id": "Mq5WKj0QEfpk"
-      },
-      "execution_count": 18,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Apply Embedding"
-      ],
-      "metadata": {
-        "id": "f86yksB9K571"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "from llama_index import Document\n",
-        "\n",
-        "documents = [Document(text=t) for t in texts]"
-      ],
-      "metadata": {
-        "id": "iXrr5-tnEfm9"
-      },
-      "execution_count": 24,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "from llama_index import VectorStoreIndex\n",
-        "\n",
-        "# build index / generate embeddings using OpenAI\n",
-        "index = VectorStoreIndex.from_documents(documents)"
-      ],
-      "metadata": {
-        "id": "qQit27lBEfkV"
-      },
-      "execution_count": 25,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Save the generated embeddings.\n",
-        "# index.storage_context.persist(persist_dir=\"indexes\")"
-      ],
-      "metadata": {
-        "id": "xxB0A9ZYM-OD"
-      },
-      "execution_count": 29,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Query Dataset"
-      ],
-      "metadata": {
-        "id": "3DoUxd8KK--Q"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "query_engine = index.as_query_engine()"
-      ],
-      "metadata": {
-        "id": "bUaNH97dEfh9"
-      },
-      "execution_count": 27,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "response = query_engine.query(\n",
-        "    \"How many parameters LLaMA2 model has?\"\n",
-        ")\n",
-        "print(response)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "tEgFx_aeFS5e",
-        "outputId": "9133bd0c-f0c5-4124-9c4b-ab6c4c32b07a"
-      },
-      "execution_count": 28,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "The Llama 2 model has four different model sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters.\n"
-          ]
-        }
-      ]
-    }
-  ]
-}