AlaFalaki commited on
Commit
ec567ac
Β·
unverified Β·
1 Parent(s): 92599f1

Delete notebook/03-RAG_with_LlamaIndex.ipynb

Browse files
Files changed (1) hide show
  1. notebook/03-RAG_with_LlamaIndex.ipynb +0 -265
notebook/03-RAG_with_LlamaIndex.ipynb DELETED
@@ -1,265 +0,0 @@
1
- {
2
- "nbformat": 4,
3
- "nbformat_minor": 0,
4
- "metadata": {
5
- "colab": {
6
- "provenance": [],
7
- "authorship_tag": "ABX9TyOqdnl91jxcohWthYkUL09p",
8
- "include_colab_link": true
9
- },
10
- "kernelspec": {
11
- "name": "python3",
12
- "display_name": "Python 3"
13
- },
14
- "language_info": {
15
- "name": "python"
16
- }
17
- },
18
- "cells": [
19
- {
20
- "cell_type": "markdown",
21
- "metadata": {
22
- "id": "view-in-github",
23
- "colab_type": "text"
24
- },
25
- "source": [
26
- "<a href=\"https://colab.research.google.com/github/towardsai/ai-tutor-rag-system/blob/main/notebook/03-RAG_with_LlamaIndex.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
27
- ]
28
- },
29
- {
30
- "cell_type": "code",
31
- "execution_count": 1,
32
- "metadata": {
33
- "colab": {
34
- "base_uri": "https://localhost:8080/"
35
- },
36
- "id": "BeuFJKlj9jKz",
37
- "outputId": "4c3a9772-cb7d-4fc1-d0e4-64186861e3e5"
38
- },
39
- "outputs": [
40
- {
41
- "output_type": "stream",
42
- "name": "stdout",
43
- "text": [
44
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m12.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
45
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m225.4/225.4 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
46
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.7/51.7 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
47
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m35.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
48
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.9/75.9 kB\u001b[0m \u001b[31m1.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
49
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m35.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
50
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.0/143.0 kB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
51
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.9/76.9 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
52
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
53
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
54
- "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
55
- "tensorflow-probability 0.22.0 requires typing-extensions<4.6.0, but you have typing-extensions 4.9.0 which is incompatible.\u001b[0m\u001b[31m\n",
56
- "\u001b[0m"
57
- ]
58
- }
59
- ],
60
- "source": [
61
- "!pip install -q llama-index==0.9.21 openai==1.6.0 cohere==4.39 tiktoken==0.5.2"
62
- ]
63
- },
64
- {
65
- "cell_type": "code",
66
- "source": [
67
- "import os\n",
68
- "\n",
69
- "os.environ[\"OPENAI_API_KEY\"] = \"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\""
70
- ],
71
- "metadata": {
72
- "id": "XuzgSNqcABpV"
73
- },
74
- "execution_count": 4,
75
- "outputs": []
76
- },
77
- {
78
- "cell_type": "code",
79
- "source": [
80
- "!wget https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-dataset.json"
81
- ],
82
- "metadata": {
83
- "colab": {
84
- "base_uri": "https://localhost:8080/"
85
- },
86
- "id": "3ImRCP7pACaI",
87
- "outputId": "9a63bdea-54f7-4923-ccbb-cab03b312774"
88
- },
89
- "execution_count": 5,
90
- "outputs": [
91
- {
92
- "output_type": "stream",
93
- "name": "stdout",
94
- "text": [
95
- "--2023-12-25 17:33:36-- https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-dataset.json\n",
96
- "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.111.133, ...\n",
97
- "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
98
- "HTTP request sent, awaiting response... 200 OK\n",
99
- "Length: 25361 (25K) [text/plain]\n",
100
- "Saving to: β€˜mini-dataset.json’\n",
101
- "\n",
102
- "mini-dataset.json 100%[===================>] 24.77K --.-KB/s in 0.006s \n",
103
- "\n",
104
- "2023-12-25 17:33:37 (3.76 MB/s) - β€˜mini-dataset.json’ saved [25361/25361]\n",
105
- "\n"
106
- ]
107
- }
108
- ]
109
- },
110
- {
111
- "cell_type": "markdown",
112
- "source": [
113
- "### Read JSON"
114
- ],
115
- "metadata": {
116
- "id": "bZZLK_wyEc-L"
117
- }
118
- },
119
- {
120
- "cell_type": "code",
121
- "source": [
122
- "import json\n",
123
- "\n",
124
- "with open('./mini-dataset.json', 'r') as file:\n",
125
- " data = json.load(file)"
126
- ],
127
- "metadata": {
128
- "id": "PBk0zgq6ACXA"
129
- },
130
- "execution_count": 15,
131
- "outputs": []
132
- },
133
- {
134
- "cell_type": "code",
135
- "source": [
136
- "len( data['chunks'] )"
137
- ],
138
- "metadata": {
139
- "colab": {
140
- "base_uri": "https://localhost:8080/"
141
- },
142
- "id": "miUqycqAEfr7",
143
- "outputId": "10005d5f-15c0-4565-a58a-6cb7e466acb4"
144
- },
145
- "execution_count": 16,
146
- "outputs": [
147
- {
148
- "output_type": "execute_result",
149
- "data": {
150
- "text/plain": [
151
- "22"
152
- ]
153
- },
154
- "metadata": {},
155
- "execution_count": 16
156
- }
157
- ]
158
- },
159
- {
160
- "cell_type": "code",
161
- "source": [
162
- "texts = [item['text'] for item in data['chunks']]"
163
- ],
164
- "metadata": {
165
- "id": "Mq5WKj0QEfpk"
166
- },
167
- "execution_count": 18,
168
- "outputs": []
169
- },
170
- {
171
- "cell_type": "markdown",
172
- "source": [
173
- "### Apply Embedding"
174
- ],
175
- "metadata": {
176
- "id": "f86yksB9K571"
177
- }
178
- },
179
- {
180
- "cell_type": "code",
181
- "source": [
182
- "from llama_index import Document\n",
183
- "\n",
184
- "documents = [Document(text=t) for t in texts]"
185
- ],
186
- "metadata": {
187
- "id": "iXrr5-tnEfm9"
188
- },
189
- "execution_count": 24,
190
- "outputs": []
191
- },
192
- {
193
- "cell_type": "code",
194
- "source": [
195
- "from llama_index import VectorStoreIndex\n",
196
- "\n",
197
- "# build index / generate embeddings using OpenAI\n",
198
- "index = VectorStoreIndex.from_documents(documents)"
199
- ],
200
- "metadata": {
201
- "id": "qQit27lBEfkV"
202
- },
203
- "execution_count": 25,
204
- "outputs": []
205
- },
206
- {
207
- "cell_type": "code",
208
- "source": [
209
- "# Save the generated embeddings.\n",
210
- "# index.storage_context.persist(persist_dir=\"indexes\")"
211
- ],
212
- "metadata": {
213
- "id": "xxB0A9ZYM-OD"
214
- },
215
- "execution_count": 29,
216
- "outputs": []
217
- },
218
- {
219
- "cell_type": "markdown",
220
- "source": [
221
- "### Query Dataset"
222
- ],
223
- "metadata": {
224
- "id": "3DoUxd8KK--Q"
225
- }
226
- },
227
- {
228
- "cell_type": "code",
229
- "source": [
230
- "query_engine = index.as_query_engine()"
231
- ],
232
- "metadata": {
233
- "id": "bUaNH97dEfh9"
234
- },
235
- "execution_count": 27,
236
- "outputs": []
237
- },
238
- {
239
- "cell_type": "code",
240
- "source": [
241
- "response = query_engine.query(\n",
242
- " \"How many parameters LLaMA2 model has?\"\n",
243
- ")\n",
244
- "print(response)"
245
- ],
246
- "metadata": {
247
- "colab": {
248
- "base_uri": "https://localhost:8080/"
249
- },
250
- "id": "tEgFx_aeFS5e",
251
- "outputId": "9133bd0c-f0c5-4124-9c4b-ab6c4c32b07a"
252
- },
253
- "execution_count": 28,
254
- "outputs": [
255
- {
256
- "output_type": "stream",
257
- "name": "stdout",
258
- "text": [
259
- "The Llama 2 model has four different model sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters.\n"
260
- ]
261
- }
262
- ]
263
- }
264
- ]
265
- }