JAIGANESAN N commited on
Commit
2327a14
Β·
1 Parent(s): a820ac5

upgrade model from GPT-4o-mini to Gemini-1.5-flash

Browse files
notebooks/04_RAG_with_VectorStore.ipynb ADDED
@@ -0,0 +1,1275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "view-in-github",
7
+ "colab_type": "text"
8
+ },
9
+ "source": [
10
+ "<a href=\"https://colab.research.google.com/github/towardsai/ai-tutor-rag-system/blob/main/notebooks/04_RAG_with_VectorStore.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "markdown",
15
+ "metadata": {
16
+ "id": "5BGJ3fxhOk2V"
17
+ },
18
+ "source": [
19
+ "# Install Packages and Setup Variables\n"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": null,
25
+ "metadata": {
26
+ "id": "QPJzr-I9XQ7l",
27
+ "collapsed": true,
28
+ "outputId": "dad24c44-2f42-4c37-a597-232ccffb9861",
29
+ "colab": {
30
+ "base_uri": "https://localhost:8080/"
31
+ }
32
+ },
33
+ "outputs": [
34
+ {
35
+ "output_type": "stream",
36
+ "name": "stdout",
37
+ "text": [
38
+ "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/67.3 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
39
+ "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
40
+ " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
41
+ " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
42
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.4/50.4 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
43
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m150.7/150.7 kB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
44
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m867.6/867.6 kB\u001b[0m \u001b[31m42.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
45
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m337.0/337.0 kB\u001b[0m \u001b[31m28.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
46
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m679.1/679.1 kB\u001b[0m \u001b[31m42.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
47
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.5/15.5 MB\u001b[0m \u001b[31m65.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
48
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m559.5/559.5 kB\u001b[0m \u001b[31m35.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
49
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m74.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
50
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m273.8/273.8 kB\u001b[0m \u001b[31m21.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
51
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.0/94.0 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
52
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.4/76.4 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
53
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
54
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━���━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m71.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
55
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m63.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
56
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.9/302.9 kB\u001b[0m \u001b[31m24.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
57
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m290.4/290.4 kB\u001b[0m \u001b[31m23.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
58
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m56.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
59
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.6/67.6 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
60
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.2/13.2 MB\u001b[0m \u001b[31m91.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
61
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.0/64.0 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
62
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m52.5/52.5 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
63
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m149.7/149.7 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
64
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.5/110.5 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
65
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.9/141.9 kB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
66
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m90.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
67
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
68
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m53.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
69
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
70
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
71
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m26.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
72
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m187.4/187.4 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
73
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
74
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
75
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.8/295.8 kB\u001b[0m \u001b[31m22.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
76
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.4/71.4 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
77
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m83.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
78
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m425.7/425.7 kB\u001b[0m \u001b[31m30.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
79
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m157.3/157.3 kB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
80
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
81
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
82
+ "\u001b[?25h Building wheel for pypika (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"
83
+ ]
84
+ }
85
+ ],
86
+ "source": [
87
+ "!pip install -q llama-index==0.10.57 llama-index-vector-stores-chroma llama-index-llms-gemini==0.1.11 langchain_google_genai google-generativeai==0.5.4 langchain==0.1.17 langchain-chroma langchain_openai==0.1.5 openai==1.37.0 chromadb"
88
+ ]
89
+ },
90
+ {
91
+ "cell_type": "code",
92
+ "execution_count": null,
93
+ "metadata": {
94
+ "id": "riuXwpSPcvWC"
95
+ },
96
+ "outputs": [],
97
+ "source": [
98
+ "import os\n",
99
+ "# Set the following API Keys in the Python environment. Will be used later.\n",
100
+ "os.environ[\"OPENAI_API_KEY\"] = \"<YOUR_API_KEY>\"\n",
101
+ "os.environ[\"GOOGLE_API_KEY\"] = \"<YOUR_API_KEY>\""
102
+ ]
103
+ },
104
+ {
105
+ "cell_type": "markdown",
106
+ "metadata": {
107
+ "id": "I9JbAzFcjkpn"
108
+ },
109
+ "source": [
110
+ "# Load the Dataset (CSV)\n"
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "markdown",
115
+ "metadata": {
116
+ "id": "_Tif8-JoRH68"
117
+ },
118
+ "source": [
119
+ "## Download\n"
120
+ ]
121
+ },
122
+ {
123
+ "cell_type": "markdown",
124
+ "metadata": {
125
+ "id": "4fQaa1LN1mXL"
126
+ },
127
+ "source": [
128
+ "The dataset includes several articles from the TowardsAI blog, which provide an in-depth explanation of the LLaMA2 model. Read the dataset as a long string.\n"
129
+ ]
130
+ },
131
+ {
132
+ "cell_type": "code",
133
+ "execution_count": null,
134
+ "metadata": {
135
+ "colab": {
136
+ "base_uri": "https://localhost:8080/"
137
+ },
138
+ "id": "-QTUkdfJjY4N",
139
+ "outputId": "b43abf38-f483-41b6-eb39-c21aa7eca276"
140
+ },
141
+ "outputs": [
142
+ {
143
+ "output_type": "stream",
144
+ "name": "stdout",
145
+ "text": [
146
+ " % Total % Received % Xferd Average Speed Time Time Time Current\n",
147
+ " Dload Upload Total Spent Left Speed\n",
148
+ "100 169k 100 169k 0 0 559k 0 --:--:-- --:--:-- --:--:-- 557k\n"
149
+ ]
150
+ }
151
+ ],
152
+ "source": [
153
+ "!curl -o ./mini-dataset.csv https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv"
154
+ ]
155
+ },
156
+ {
157
+ "cell_type": "markdown",
158
+ "metadata": {
159
+ "id": "zk-4alIxROo8"
160
+ },
161
+ "source": [
162
+ "## Read File\n"
163
+ ]
164
+ },
165
+ {
166
+ "cell_type": "code",
167
+ "execution_count": null,
168
+ "metadata": {
169
+ "colab": {
170
+ "base_uri": "https://localhost:8080/"
171
+ },
172
+ "id": "7CYwRT6R0o0I",
173
+ "outputId": "bdcf783d-c75b-4650-dafe-70f99ddd7e76"
174
+ },
175
+ "outputs": [
176
+ {
177
+ "output_type": "stream",
178
+ "name": "stdout",
179
+ "text": [
180
+ "171044\n"
181
+ ]
182
+ }
183
+ ],
184
+ "source": [
185
+ "import csv\n",
186
+ "\n",
187
+ "text = \"\"\n",
188
+ "\n",
189
+ "# Load the file as a JSON\n",
190
+ "with open(\"./mini-dataset.csv\", mode=\"r\", encoding=\"utf-8\") as file:\n",
191
+ " csv_reader = csv.reader(file)\n",
192
+ "\n",
193
+ " for idx, row in enumerate(csv_reader):\n",
194
+ " if idx == 0:\n",
195
+ " continue\n",
196
+ " text += row[1]\n",
197
+ "\n",
198
+ "# The number of characters in the dataset.\n",
199
+ "print(len(text))"
200
+ ]
201
+ },
202
+ {
203
+ "cell_type": "markdown",
204
+ "metadata": {
205
+ "id": "S17g2RYOjmf2"
206
+ },
207
+ "source": [
208
+ "# Chunking\n"
209
+ ]
210
+ },
211
+ {
212
+ "cell_type": "code",
213
+ "execution_count": null,
214
+ "metadata": {
215
+ "colab": {
216
+ "base_uri": "https://localhost:8080/"
217
+ },
218
+ "id": "STACTMUR1z9N",
219
+ "outputId": "bc0c4808-f709-4eee-bb07-0993a2cb8f73"
220
+ },
221
+ "outputs": [
222
+ {
223
+ "output_type": "stream",
224
+ "name": "stdout",
225
+ "text": [
226
+ "335\n"
227
+ ]
228
+ }
229
+ ],
230
+ "source": [
231
+ "chunk_size = 512\n",
232
+ "chunks = []\n",
233
+ "\n",
234
+ "# Split the long text into smaller manageable chunks of 512 characters.\n",
235
+ "for i in range(0, len(text), chunk_size):\n",
236
+ " chunks.append(text[i : i + chunk_size])\n",
237
+ "\n",
238
+ "print(len(chunks))"
239
+ ]
240
+ },
241
+ {
242
+ "cell_type": "markdown",
243
+ "metadata": {
244
+ "id": "9fOomeMGqu10"
245
+ },
246
+ "source": [
247
+ "#Interface of Chroma with LlamaIndex\n"
248
+ ]
249
+ },
250
+ {
251
+ "cell_type": "code",
252
+ "execution_count": null,
253
+ "metadata": {
254
+ "id": "CtdsIUQ81_hT"
255
+ },
256
+ "outputs": [],
257
+ "source": [
258
+ "from llama_index.core import Document\n",
259
+ "\n",
260
+ "# Convert the chunks to Document objects so the LlamaIndex framework can process them.\n",
261
+ "documents = [Document(text=t) for t in chunks]"
262
+ ]
263
+ },
264
+ {
265
+ "cell_type": "markdown",
266
+ "metadata": {
267
+ "id": "OWaT6rL7ksp8"
268
+ },
269
+ "source": [
270
+ "Save on Chroma\n"
271
+ ]
272
+ },
273
+ {
274
+ "cell_type": "code",
275
+ "execution_count": null,
276
+ "metadata": {
277
+ "id": "mXi56KTXk2sp"
278
+ },
279
+ "outputs": [],
280
+ "source": [
281
+ "import chromadb\n",
282
+ "\n",
283
+ "# create client and a new collection\n",
284
+ "# chromadb.EphemeralClient saves data in-memory.\n",
285
+ "chroma_client = chromadb.PersistentClient(path=\"./mini-chunked-dataset\")\n",
286
+ "chroma_collection = chroma_client.create_collection(\"mini-chunked-dataset\")"
287
+ ]
288
+ },
289
+ {
290
+ "cell_type": "code",
291
+ "execution_count": null,
292
+ "metadata": {
293
+ "id": "jKXURvLtkuTS"
294
+ },
295
+ "outputs": [],
296
+ "source": [
297
+ "from llama_index.vector_stores.chroma import ChromaVectorStore\n",
298
+ "from llama_index.core import StorageContext\n",
299
+ "\n",
300
+ "# Define a storage context object using the created vector database.\n",
301
+ "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
302
+ "storage_context = StorageContext.from_defaults(vector_store=vector_store)"
303
+ ]
304
+ },
305
+ {
306
+ "cell_type": "code",
307
+ "execution_count": null,
308
+ "metadata": {
309
+ "id": "WsD52wtrlESi",
310
+ "outputId": "975aeeeb-df70-4946-d306-54aab19d3f09",
311
+ "colab": {
312
+ "base_uri": "https://localhost:8080/",
313
+ "height": 81,
314
+ "referenced_widgets": [
315
+ "be409ad8ca7e42b2aac87e193d55d116",
316
+ "2985c54fc3834d8599323f52075a01a6",
317
+ "a96efe0fc89e42748f1c37fdc000056b",
318
+ "29bbfc318ffd4a8e9452960f0f2ccb8d",
319
+ "393c4f0d140c4259add663bf43767cbb",
320
+ "77a5354e5209441bb6a69b71f96a2102",
321
+ "4d682a386d1146cf828470083fba1fe6",
322
+ "cf7bcdd679b9462285c619966a49f6d1",
323
+ "2f0ec2b1e52d441ca835deb88cb9349f",
324
+ "555508eb2f8c4caf81b623a8c157e742",
325
+ "8035840c130f4804b9da0958d23713bc",
326
+ "6d93958f663f48b4922a9524efb70e91",
327
+ "4b42d724b989497faee4836a1e2dda70",
328
+ "cbe2e4a95f2e412f83fed16bc5db08ad",
329
+ "1f3d664867634613a30281f61ab33ac7",
330
+ "aab9626f226c4c83908c3b042d6e4bdb",
331
+ "eecd42e03f4d484c87032c25df7570b3",
332
+ "1956d41b8b9540c99fb9b4a4df7bbaa2",
333
+ "d800ddbadddd48ecbbaf0dd39035d275",
334
+ "9d58bc10ef844753a17505aca55e079a",
335
+ "91ca1a302884473f8314c097c41d03fd",
336
+ "b71c84b4ca3443d29d650bb8ea0f5458"
337
+ ]
338
+ }
339
+ },
340
+ "outputs": [
341
+ {
342
+ "output_type": "display_data",
343
+ "data": {
344
+ "text/plain": [
345
+ "Parsing nodes: 0%| | 0/335 [00:00<?, ?it/s]"
346
+ ],
347
+ "application/vnd.jupyter.widget-view+json": {
348
+ "version_major": 2,
349
+ "version_minor": 0,
350
+ "model_id": "be409ad8ca7e42b2aac87e193d55d116"
351
+ }
352
+ },
353
+ "metadata": {}
354
+ },
355
+ {
356
+ "output_type": "display_data",
357
+ "data": {
358
+ "text/plain": [
359
+ "Generating embeddings: 0%| | 0/335 [00:00<?, ?it/s]"
360
+ ],
361
+ "application/vnd.jupyter.widget-view+json": {
362
+ "version_major": 2,
363
+ "version_minor": 0,
364
+ "model_id": "6d93958f663f48b4922a9524efb70e91"
365
+ }
366
+ },
367
+ "metadata": {}
368
+ }
369
+ ],
370
+ "source": [
371
+ "from llama_index.core import VectorStoreIndex\n",
372
+ "from llama_index.core.node_parser import SentenceSplitter\n",
373
+ "from llama_index.embeddings.openai import OpenAIEmbedding\n",
374
+ "\n",
375
+ "# Build index / generate embeddings using OpenAI embedding model\n",
376
+ "index = VectorStoreIndex.from_documents(\n",
377
+ " documents,\n",
378
+ " embed_model=OpenAIEmbedding(model=\"text-embedding-3-small\"),\n",
379
+ " storage_context=storage_context,\n",
380
+ " show_progress=True,\n",
381
+ ")"
382
+ ]
383
+ },
384
+ {
385
+ "cell_type": "markdown",
386
+ "metadata": {
387
+ "id": "8JPD8yAinVSq"
388
+ },
389
+ "source": [
390
+ "Query Dataset\n"
391
+ ]
392
+ },
393
+ {
394
+ "cell_type": "code",
395
+ "execution_count": null,
396
+ "metadata": {
397
+ "id": "mzS13x1ZlZ5X"
398
+ },
399
+ "outputs": [],
400
+ "source": [
401
+ "# Define a query engine that is responsible for retrieving related pieces of text,\n",
402
+ "# and using a LLM to formulate the final answer.\n",
403
+ "\n",
404
+ "from llama_index.llms.gemini import Gemini\n",
405
+ "\n",
406
+ "llm = Gemini(model=\"models/gemini-1.5-flash\", temperature=1, max_tokens=512)\n",
407
+ "\n",
408
+ "query_engine = index.as_query_engine(llm=llm, similarity_top_k=5)"
409
+ ]
410
+ },
411
+ {
412
+ "cell_type": "code",
413
+ "execution_count": null,
414
+ "metadata": {
415
+ "colab": {
416
+ "base_uri": "https://localhost:8080/",
417
+ "height": 52
418
+ },
419
+ "id": "AYsQ4uLN_Oxg",
420
+ "outputId": "1acd38f6-d083-4d4a-aff2-a0063561adc1"
421
+ },
422
+ "outputs": [
423
+ {
424
+ "output_type": "stream",
425
+ "name": "stdout",
426
+ "text": [
427
+ "The LLaMA 2 model has four different sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters. \n",
428
+ "\n"
429
+ ]
430
+ }
431
+ ],
432
+ "source": [
433
+ "response = query_engine.query(\"How many parameters LLaMA2 model has?\")\n",
434
+ "print(response)"
435
+ ]
436
+ },
437
+ {
438
+ "cell_type": "markdown",
439
+ "metadata": {
440
+ "id": "kWK571VNg-qR"
441
+ },
442
+ "source": [
443
+ "# Interface of Chroma with LangChain\n"
444
+ ]
445
+ },
446
+ {
447
+ "cell_type": "code",
448
+ "execution_count": null,
449
+ "metadata": {
450
+ "id": "SMPAniL2e4NP"
451
+ },
452
+ "outputs": [],
453
+ "source": [
454
+ "from langchain.schema.document import Document\n",
455
+ "\n",
456
+ "# Convert the chunks to Document objects so the LangChain framework can process them.\n",
457
+ "documents = [Document(page_content=t) for t in chunks]"
458
+ ]
459
+ },
460
+ {
461
+ "cell_type": "markdown",
462
+ "metadata": {
463
+ "id": "QBt8qGxArUPD"
464
+ },
465
+ "source": [
466
+ "Save on Chroma\n"
467
+ ]
468
+ },
469
+ {
470
+ "cell_type": "code",
471
+ "execution_count": null,
472
+ "metadata": {
473
+ "id": "2xas7HkuhJ8A"
474
+ },
475
+ "outputs": [],
476
+ "source": [
477
+ "from langchain_chroma import Chroma\n",
478
+ "from langchain_openai import OpenAIEmbeddings\n",
479
+ "\n",
480
+ "# Add the documents to chroma DB and create Index / embeddings\n",
481
+ "\n",
482
+ "embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n",
483
+ "chroma_db = Chroma.from_documents(\n",
484
+ " documents=documents,\n",
485
+ " embedding=embeddings,\n",
486
+ " persist_directory=\"./mini-chunked-dataset\",\n",
487
+ " collection_name=\"mini-chunked-dataset\",\n",
488
+ ")"
489
+ ]
490
+ },
491
+ {
492
+ "cell_type": "markdown",
493
+ "metadata": {
494
+ "id": "P8AXJJyBrZWF"
495
+ },
496
+ "source": [
497
+ "Query Dataset\n"
498
+ ]
499
+ },
500
+ {
501
+ "cell_type": "code",
502
+ "execution_count": null,
503
+ "metadata": {
504
+ "id": "-H64YLxshM2b"
505
+ },
506
+ "outputs": [],
507
+ "source": [
508
+ "from langchain_google_genai import ChatGoogleGenerativeAI\n",
509
+ "\n",
510
+ "# Initializing the LLM model\n",
511
+ "#llm = ChatOpenAI(temperature=0, model=\"gpt-4o-mini\", max_tokens=512)\n",
512
+ "\n",
513
+ "llm = ChatGoogleGenerativeAI(\n",
514
+ " model=\"gemini-1.5-flash\",\n",
515
+ " temperature=0,\n",
516
+ " max_tokens=512,\n",
517
+ ")"
518
+ ]
519
+ },
520
+ {
521
+ "cell_type": "code",
522
+ "execution_count": null,
523
+ "metadata": {
524
+ "colab": {
525
+ "base_uri": "https://localhost:8080/"
526
+ },
527
+ "id": "AxBqPNtthPaa",
528
+ "outputId": "138a1238-97b8-41e1-9fd7-d655997d0743"
529
+ },
530
+ "outputs": [
531
+ {
532
+ "output_type": "stream",
533
+ "name": "stdout",
534
+ "text": [
535
+ "I'm sorry, but the provided context doesn't mention the number of parameters for the LLaMA2 model. \n",
536
+ "\n"
537
+ ]
538
+ }
539
+ ],
540
+ "source": [
541
+ "from langchain.chains import RetrievalQA\n",
542
+ "\n",
543
+ "query = \"How many parameters LLaMA2 model has?\"\n",
544
+ "retriever = chroma_db.as_retriever(search_kwargs={\"k\": 2})\n",
545
+ "# Define a RetrievalQA chain that is responsible for retrieving related pieces of text,\n",
546
+ "# and using a LLM to formulate the final answer.\n",
547
+ "chain = RetrievalQA.from_chain_type(llm=llm, chain_type=\"stuff\", retriever=retriever)\n",
548
+ "\n",
549
+ "response = chain.invoke(query)\n",
550
+ "print(response[\"result\"])"
551
+ ]
552
+ },
553
+ {
554
+ "cell_type": "code",
555
+ "source": [],
556
+ "metadata": {
557
+ "id": "AKr16L_kwyYX"
558
+ },
559
+ "execution_count": null,
560
+ "outputs": []
561
+ }
562
+ ],
563
+ "metadata": {
564
+ "colab": {
565
+ "provenance": [],
566
+ "include_colab_link": true
567
+ },
568
+ "kernelspec": {
569
+ "display_name": "Python 3",
570
+ "name": "python3"
571
+ },
572
+ "language_info": {
573
+ "codemirror_mode": {
574
+ "name": "ipython",
575
+ "version": 3
576
+ },
577
+ "file_extension": ".py",
578
+ "mimetype": "text/x-python",
579
+ "name": "python",
580
+ "nbconvert_exporter": "python",
581
+ "pygments_lexer": "ipython3",
582
+ "version": "3.12.4"
583
+ },
584
+ "widgets": {
585
+ "application/vnd.jupyter.widget-state+json": {
586
+ "be409ad8ca7e42b2aac87e193d55d116": {
587
+ "model_module": "@jupyter-widgets/controls",
588
+ "model_name": "HBoxModel",
589
+ "model_module_version": "1.5.0",
590
+ "state": {
591
+ "_dom_classes": [],
592
+ "_model_module": "@jupyter-widgets/controls",
593
+ "_model_module_version": "1.5.0",
594
+ "_model_name": "HBoxModel",
595
+ "_view_count": null,
596
+ "_view_module": "@jupyter-widgets/controls",
597
+ "_view_module_version": "1.5.0",
598
+ "_view_name": "HBoxView",
599
+ "box_style": "",
600
+ "children": [
601
+ "IPY_MODEL_2985c54fc3834d8599323f52075a01a6",
602
+ "IPY_MODEL_a96efe0fc89e42748f1c37fdc000056b",
603
+ "IPY_MODEL_29bbfc318ffd4a8e9452960f0f2ccb8d"
604
+ ],
605
+ "layout": "IPY_MODEL_393c4f0d140c4259add663bf43767cbb"
606
+ }
607
+ },
608
+ "2985c54fc3834d8599323f52075a01a6": {
609
+ "model_module": "@jupyter-widgets/controls",
610
+ "model_name": "HTMLModel",
611
+ "model_module_version": "1.5.0",
612
+ "state": {
613
+ "_dom_classes": [],
614
+ "_model_module": "@jupyter-widgets/controls",
615
+ "_model_module_version": "1.5.0",
616
+ "_model_name": "HTMLModel",
617
+ "_view_count": null,
618
+ "_view_module": "@jupyter-widgets/controls",
619
+ "_view_module_version": "1.5.0",
620
+ "_view_name": "HTMLView",
621
+ "description": "",
622
+ "description_tooltip": null,
623
+ "layout": "IPY_MODEL_77a5354e5209441bb6a69b71f96a2102",
624
+ "placeholder": "​",
625
+ "style": "IPY_MODEL_4d682a386d1146cf828470083fba1fe6",
626
+ "value": "Parsing nodes: 100%"
627
+ }
628
+ },
629
+ "a96efe0fc89e42748f1c37fdc000056b": {
630
+ "model_module": "@jupyter-widgets/controls",
631
+ "model_name": "FloatProgressModel",
632
+ "model_module_version": "1.5.0",
633
+ "state": {
634
+ "_dom_classes": [],
635
+ "_model_module": "@jupyter-widgets/controls",
636
+ "_model_module_version": "1.5.0",
637
+ "_model_name": "FloatProgressModel",
638
+ "_view_count": null,
639
+ "_view_module": "@jupyter-widgets/controls",
640
+ "_view_module_version": "1.5.0",
641
+ "_view_name": "ProgressView",
642
+ "bar_style": "success",
643
+ "description": "",
644
+ "description_tooltip": null,
645
+ "layout": "IPY_MODEL_cf7bcdd679b9462285c619966a49f6d1",
646
+ "max": 335,
647
+ "min": 0,
648
+ "orientation": "horizontal",
649
+ "style": "IPY_MODEL_2f0ec2b1e52d441ca835deb88cb9349f",
650
+ "value": 335
651
+ }
652
+ },
653
+ "29bbfc318ffd4a8e9452960f0f2ccb8d": {
654
+ "model_module": "@jupyter-widgets/controls",
655
+ "model_name": "HTMLModel",
656
+ "model_module_version": "1.5.0",
657
+ "state": {
658
+ "_dom_classes": [],
659
+ "_model_module": "@jupyter-widgets/controls",
660
+ "_model_module_version": "1.5.0",
661
+ "_model_name": "HTMLModel",
662
+ "_view_count": null,
663
+ "_view_module": "@jupyter-widgets/controls",
664
+ "_view_module_version": "1.5.0",
665
+ "_view_name": "HTMLView",
666
+ "description": "",
667
+ "description_tooltip": null,
668
+ "layout": "IPY_MODEL_555508eb2f8c4caf81b623a8c157e742",
669
+ "placeholder": "​",
670
+ "style": "IPY_MODEL_8035840c130f4804b9da0958d23713bc",
671
+ "value": " 335/335 [00:00&lt;00:00, 599.37it/s]"
672
+ }
673
+ },
674
+ "393c4f0d140c4259add663bf43767cbb": {
675
+ "model_module": "@jupyter-widgets/base",
676
+ "model_name": "LayoutModel",
677
+ "model_module_version": "1.2.0",
678
+ "state": {
679
+ "_model_module": "@jupyter-widgets/base",
680
+ "_model_module_version": "1.2.0",
681
+ "_model_name": "LayoutModel",
682
+ "_view_count": null,
683
+ "_view_module": "@jupyter-widgets/base",
684
+ "_view_module_version": "1.2.0",
685
+ "_view_name": "LayoutView",
686
+ "align_content": null,
687
+ "align_items": null,
688
+ "align_self": null,
689
+ "border": null,
690
+ "bottom": null,
691
+ "display": null,
692
+ "flex": null,
693
+ "flex_flow": null,
694
+ "grid_area": null,
695
+ "grid_auto_columns": null,
696
+ "grid_auto_flow": null,
697
+ "grid_auto_rows": null,
698
+ "grid_column": null,
699
+ "grid_gap": null,
700
+ "grid_row": null,
701
+ "grid_template_areas": null,
702
+ "grid_template_columns": null,
703
+ "grid_template_rows": null,
704
+ "height": null,
705
+ "justify_content": null,
706
+ "justify_items": null,
707
+ "left": null,
708
+ "margin": null,
709
+ "max_height": null,
710
+ "max_width": null,
711
+ "min_height": null,
712
+ "min_width": null,
713
+ "object_fit": null,
714
+ "object_position": null,
715
+ "order": null,
716
+ "overflow": null,
717
+ "overflow_x": null,
718
+ "overflow_y": null,
719
+ "padding": null,
720
+ "right": null,
721
+ "top": null,
722
+ "visibility": null,
723
+ "width": null
724
+ }
725
+ },
726
+ "77a5354e5209441bb6a69b71f96a2102": {
727
+ "model_module": "@jupyter-widgets/base",
728
+ "model_name": "LayoutModel",
729
+ "model_module_version": "1.2.0",
730
+ "state": {
731
+ "_model_module": "@jupyter-widgets/base",
732
+ "_model_module_version": "1.2.0",
733
+ "_model_name": "LayoutModel",
734
+ "_view_count": null,
735
+ "_view_module": "@jupyter-widgets/base",
736
+ "_view_module_version": "1.2.0",
737
+ "_view_name": "LayoutView",
738
+ "align_content": null,
739
+ "align_items": null,
740
+ "align_self": null,
741
+ "border": null,
742
+ "bottom": null,
743
+ "display": null,
744
+ "flex": null,
745
+ "flex_flow": null,
746
+ "grid_area": null,
747
+ "grid_auto_columns": null,
748
+ "grid_auto_flow": null,
749
+ "grid_auto_rows": null,
750
+ "grid_column": null,
751
+ "grid_gap": null,
752
+ "grid_row": null,
753
+ "grid_template_areas": null,
754
+ "grid_template_columns": null,
755
+ "grid_template_rows": null,
756
+ "height": null,
757
+ "justify_content": null,
758
+ "justify_items": null,
759
+ "left": null,
760
+ "margin": null,
761
+ "max_height": null,
762
+ "max_width": null,
763
+ "min_height": null,
764
+ "min_width": null,
765
+ "object_fit": null,
766
+ "object_position": null,
767
+ "order": null,
768
+ "overflow": null,
769
+ "overflow_x": null,
770
+ "overflow_y": null,
771
+ "padding": null,
772
+ "right": null,
773
+ "top": null,
774
+ "visibility": null,
775
+ "width": null
776
+ }
777
+ },
778
+ "4d682a386d1146cf828470083fba1fe6": {
779
+ "model_module": "@jupyter-widgets/controls",
780
+ "model_name": "DescriptionStyleModel",
781
+ "model_module_version": "1.5.0",
782
+ "state": {
783
+ "_model_module": "@jupyter-widgets/controls",
784
+ "_model_module_version": "1.5.0",
785
+ "_model_name": "DescriptionStyleModel",
786
+ "_view_count": null,
787
+ "_view_module": "@jupyter-widgets/base",
788
+ "_view_module_version": "1.2.0",
789
+ "_view_name": "StyleView",
790
+ "description_width": ""
791
+ }
792
+ },
793
+ "cf7bcdd679b9462285c619966a49f6d1": {
794
+ "model_module": "@jupyter-widgets/base",
795
+ "model_name": "LayoutModel",
796
+ "model_module_version": "1.2.0",
797
+ "state": {
798
+ "_model_module": "@jupyter-widgets/base",
799
+ "_model_module_version": "1.2.0",
800
+ "_model_name": "LayoutModel",
801
+ "_view_count": null,
802
+ "_view_module": "@jupyter-widgets/base",
803
+ "_view_module_version": "1.2.0",
804
+ "_view_name": "LayoutView",
805
+ "align_content": null,
806
+ "align_items": null,
807
+ "align_self": null,
808
+ "border": null,
809
+ "bottom": null,
810
+ "display": null,
811
+ "flex": null,
812
+ "flex_flow": null,
813
+ "grid_area": null,
814
+ "grid_auto_columns": null,
815
+ "grid_auto_flow": null,
816
+ "grid_auto_rows": null,
817
+ "grid_column": null,
818
+ "grid_gap": null,
819
+ "grid_row": null,
820
+ "grid_template_areas": null,
821
+ "grid_template_columns": null,
822
+ "grid_template_rows": null,
823
+ "height": null,
824
+ "justify_content": null,
825
+ "justify_items": null,
826
+ "left": null,
827
+ "margin": null,
828
+ "max_height": null,
829
+ "max_width": null,
830
+ "min_height": null,
831
+ "min_width": null,
832
+ "object_fit": null,
833
+ "object_position": null,
834
+ "order": null,
835
+ "overflow": null,
836
+ "overflow_x": null,
837
+ "overflow_y": null,
838
+ "padding": null,
839
+ "right": null,
840
+ "top": null,
841
+ "visibility": null,
842
+ "width": null
843
+ }
844
+ },
845
+ "2f0ec2b1e52d441ca835deb88cb9349f": {
846
+ "model_module": "@jupyter-widgets/controls",
847
+ "model_name": "ProgressStyleModel",
848
+ "model_module_version": "1.5.0",
849
+ "state": {
850
+ "_model_module": "@jupyter-widgets/controls",
851
+ "_model_module_version": "1.5.0",
852
+ "_model_name": "ProgressStyleModel",
853
+ "_view_count": null,
854
+ "_view_module": "@jupyter-widgets/base",
855
+ "_view_module_version": "1.2.0",
856
+ "_view_name": "StyleView",
857
+ "bar_color": null,
858
+ "description_width": ""
859
+ }
860
+ },
861
+ "555508eb2f8c4caf81b623a8c157e742": {
862
+ "model_module": "@jupyter-widgets/base",
863
+ "model_name": "LayoutModel",
864
+ "model_module_version": "1.2.0",
865
+ "state": {
866
+ "_model_module": "@jupyter-widgets/base",
867
+ "_model_module_version": "1.2.0",
868
+ "_model_name": "LayoutModel",
869
+ "_view_count": null,
870
+ "_view_module": "@jupyter-widgets/base",
871
+ "_view_module_version": "1.2.0",
872
+ "_view_name": "LayoutView",
873
+ "align_content": null,
874
+ "align_items": null,
875
+ "align_self": null,
876
+ "border": null,
877
+ "bottom": null,
878
+ "display": null,
879
+ "flex": null,
880
+ "flex_flow": null,
881
+ "grid_area": null,
882
+ "grid_auto_columns": null,
883
+ "grid_auto_flow": null,
884
+ "grid_auto_rows": null,
885
+ "grid_column": null,
886
+ "grid_gap": null,
887
+ "grid_row": null,
888
+ "grid_template_areas": null,
889
+ "grid_template_columns": null,
890
+ "grid_template_rows": null,
891
+ "height": null,
892
+ "justify_content": null,
893
+ "justify_items": null,
894
+ "left": null,
895
+ "margin": null,
896
+ "max_height": null,
897
+ "max_width": null,
898
+ "min_height": null,
899
+ "min_width": null,
900
+ "object_fit": null,
901
+ "object_position": null,
902
+ "order": null,
903
+ "overflow": null,
904
+ "overflow_x": null,
905
+ "overflow_y": null,
906
+ "padding": null,
907
+ "right": null,
908
+ "top": null,
909
+ "visibility": null,
910
+ "width": null
911
+ }
912
+ },
913
+ "8035840c130f4804b9da0958d23713bc": {
914
+ "model_module": "@jupyter-widgets/controls",
915
+ "model_name": "DescriptionStyleModel",
916
+ "model_module_version": "1.5.0",
917
+ "state": {
918
+ "_model_module": "@jupyter-widgets/controls",
919
+ "_model_module_version": "1.5.0",
920
+ "_model_name": "DescriptionStyleModel",
921
+ "_view_count": null,
922
+ "_view_module": "@jupyter-widgets/base",
923
+ "_view_module_version": "1.2.0",
924
+ "_view_name": "StyleView",
925
+ "description_width": ""
926
+ }
927
+ },
928
+ "6d93958f663f48b4922a9524efb70e91": {
929
+ "model_module": "@jupyter-widgets/controls",
930
+ "model_name": "HBoxModel",
931
+ "model_module_version": "1.5.0",
932
+ "state": {
933
+ "_dom_classes": [],
934
+ "_model_module": "@jupyter-widgets/controls",
935
+ "_model_module_version": "1.5.0",
936
+ "_model_name": "HBoxModel",
937
+ "_view_count": null,
938
+ "_view_module": "@jupyter-widgets/controls",
939
+ "_view_module_version": "1.5.0",
940
+ "_view_name": "HBoxView",
941
+ "box_style": "",
942
+ "children": [
943
+ "IPY_MODEL_4b42d724b989497faee4836a1e2dda70",
944
+ "IPY_MODEL_cbe2e4a95f2e412f83fed16bc5db08ad",
945
+ "IPY_MODEL_1f3d664867634613a30281f61ab33ac7"
946
+ ],
947
+ "layout": "IPY_MODEL_aab9626f226c4c83908c3b042d6e4bdb"
948
+ }
949
+ },
950
+ "4b42d724b989497faee4836a1e2dda70": {
951
+ "model_module": "@jupyter-widgets/controls",
952
+ "model_name": "HTMLModel",
953
+ "model_module_version": "1.5.0",
954
+ "state": {
955
+ "_dom_classes": [],
956
+ "_model_module": "@jupyter-widgets/controls",
957
+ "_model_module_version": "1.5.0",
958
+ "_model_name": "HTMLModel",
959
+ "_view_count": null,
960
+ "_view_module": "@jupyter-widgets/controls",
961
+ "_view_module_version": "1.5.0",
962
+ "_view_name": "HTMLView",
963
+ "description": "",
964
+ "description_tooltip": null,
965
+ "layout": "IPY_MODEL_eecd42e03f4d484c87032c25df7570b3",
966
+ "placeholder": "​",
967
+ "style": "IPY_MODEL_1956d41b8b9540c99fb9b4a4df7bbaa2",
968
+ "value": "Generating embeddings: 100%"
969
+ }
970
+ },
971
+ "cbe2e4a95f2e412f83fed16bc5db08ad": {
972
+ "model_module": "@jupyter-widgets/controls",
973
+ "model_name": "FloatProgressModel",
974
+ "model_module_version": "1.5.0",
975
+ "state": {
976
+ "_dom_classes": [],
977
+ "_model_module": "@jupyter-widgets/controls",
978
+ "_model_module_version": "1.5.0",
979
+ "_model_name": "FloatProgressModel",
980
+ "_view_count": null,
981
+ "_view_module": "@jupyter-widgets/controls",
982
+ "_view_module_version": "1.5.0",
983
+ "_view_name": "ProgressView",
984
+ "bar_style": "success",
985
+ "description": "",
986
+ "description_tooltip": null,
987
+ "layout": "IPY_MODEL_d800ddbadddd48ecbbaf0dd39035d275",
988
+ "max": 335,
989
+ "min": 0,
990
+ "orientation": "horizontal",
991
+ "style": "IPY_MODEL_9d58bc10ef844753a17505aca55e079a",
992
+ "value": 335
993
+ }
994
+ },
995
+ "1f3d664867634613a30281f61ab33ac7": {
996
+ "model_module": "@jupyter-widgets/controls",
997
+ "model_name": "HTMLModel",
998
+ "model_module_version": "1.5.0",
999
+ "state": {
1000
+ "_dom_classes": [],
1001
+ "_model_module": "@jupyter-widgets/controls",
1002
+ "_model_module_version": "1.5.0",
1003
+ "_model_name": "HTMLModel",
1004
+ "_view_count": null,
1005
+ "_view_module": "@jupyter-widgets/controls",
1006
+ "_view_module_version": "1.5.0",
1007
+ "_view_name": "HTMLView",
1008
+ "description": "",
1009
+ "description_tooltip": null,
1010
+ "layout": "IPY_MODEL_91ca1a302884473f8314c097c41d03fd",
1011
+ "placeholder": "​",
1012
+ "style": "IPY_MODEL_b71c84b4ca3443d29d650bb8ea0f5458",
1013
+ "value": " 335/335 [00:05&lt;00:00, 58.54it/s]"
1014
+ }
1015
+ },
1016
+ "aab9626f226c4c83908c3b042d6e4bdb": {
1017
+ "model_module": "@jupyter-widgets/base",
1018
+ "model_name": "LayoutModel",
1019
+ "model_module_version": "1.2.0",
1020
+ "state": {
1021
+ "_model_module": "@jupyter-widgets/base",
1022
+ "_model_module_version": "1.2.0",
1023
+ "_model_name": "LayoutModel",
1024
+ "_view_count": null,
1025
+ "_view_module": "@jupyter-widgets/base",
1026
+ "_view_module_version": "1.2.0",
1027
+ "_view_name": "LayoutView",
1028
+ "align_content": null,
1029
+ "align_items": null,
1030
+ "align_self": null,
1031
+ "border": null,
1032
+ "bottom": null,
1033
+ "display": null,
1034
+ "flex": null,
1035
+ "flex_flow": null,
1036
+ "grid_area": null,
1037
+ "grid_auto_columns": null,
1038
+ "grid_auto_flow": null,
1039
+ "grid_auto_rows": null,
1040
+ "grid_column": null,
1041
+ "grid_gap": null,
1042
+ "grid_row": null,
1043
+ "grid_template_areas": null,
1044
+ "grid_template_columns": null,
1045
+ "grid_template_rows": null,
1046
+ "height": null,
1047
+ "justify_content": null,
1048
+ "justify_items": null,
1049
+ "left": null,
1050
+ "margin": null,
1051
+ "max_height": null,
1052
+ "max_width": null,
1053
+ "min_height": null,
1054
+ "min_width": null,
1055
+ "object_fit": null,
1056
+ "object_position": null,
1057
+ "order": null,
1058
+ "overflow": null,
1059
+ "overflow_x": null,
1060
+ "overflow_y": null,
1061
+ "padding": null,
1062
+ "right": null,
1063
+ "top": null,
1064
+ "visibility": null,
1065
+ "width": null
1066
+ }
1067
+ },
1068
+ "eecd42e03f4d484c87032c25df7570b3": {
1069
+ "model_module": "@jupyter-widgets/base",
1070
+ "model_name": "LayoutModel",
1071
+ "model_module_version": "1.2.0",
1072
+ "state": {
1073
+ "_model_module": "@jupyter-widgets/base",
1074
+ "_model_module_version": "1.2.0",
1075
+ "_model_name": "LayoutModel",
1076
+ "_view_count": null,
1077
+ "_view_module": "@jupyter-widgets/base",
1078
+ "_view_module_version": "1.2.0",
1079
+ "_view_name": "LayoutView",
1080
+ "align_content": null,
1081
+ "align_items": null,
1082
+ "align_self": null,
1083
+ "border": null,
1084
+ "bottom": null,
1085
+ "display": null,
1086
+ "flex": null,
1087
+ "flex_flow": null,
1088
+ "grid_area": null,
1089
+ "grid_auto_columns": null,
1090
+ "grid_auto_flow": null,
1091
+ "grid_auto_rows": null,
1092
+ "grid_column": null,
1093
+ "grid_gap": null,
1094
+ "grid_row": null,
1095
+ "grid_template_areas": null,
1096
+ "grid_template_columns": null,
1097
+ "grid_template_rows": null,
1098
+ "height": null,
1099
+ "justify_content": null,
1100
+ "justify_items": null,
1101
+ "left": null,
1102
+ "margin": null,
1103
+ "max_height": null,
1104
+ "max_width": null,
1105
+ "min_height": null,
1106
+ "min_width": null,
1107
+ "object_fit": null,
1108
+ "object_position": null,
1109
+ "order": null,
1110
+ "overflow": null,
1111
+ "overflow_x": null,
1112
+ "overflow_y": null,
1113
+ "padding": null,
1114
+ "right": null,
1115
+ "top": null,
1116
+ "visibility": null,
1117
+ "width": null
1118
+ }
1119
+ },
1120
+ "1956d41b8b9540c99fb9b4a4df7bbaa2": {
1121
+ "model_module": "@jupyter-widgets/controls",
1122
+ "model_name": "DescriptionStyleModel",
1123
+ "model_module_version": "1.5.0",
1124
+ "state": {
1125
+ "_model_module": "@jupyter-widgets/controls",
1126
+ "_model_module_version": "1.5.0",
1127
+ "_model_name": "DescriptionStyleModel",
1128
+ "_view_count": null,
1129
+ "_view_module": "@jupyter-widgets/base",
1130
+ "_view_module_version": "1.2.0",
1131
+ "_view_name": "StyleView",
1132
+ "description_width": ""
1133
+ }
1134
+ },
1135
+ "d800ddbadddd48ecbbaf0dd39035d275": {
1136
+ "model_module": "@jupyter-widgets/base",
1137
+ "model_name": "LayoutModel",
1138
+ "model_module_version": "1.2.0",
1139
+ "state": {
1140
+ "_model_module": "@jupyter-widgets/base",
1141
+ "_model_module_version": "1.2.0",
1142
+ "_model_name": "LayoutModel",
1143
+ "_view_count": null,
1144
+ "_view_module": "@jupyter-widgets/base",
1145
+ "_view_module_version": "1.2.0",
1146
+ "_view_name": "LayoutView",
1147
+ "align_content": null,
1148
+ "align_items": null,
1149
+ "align_self": null,
1150
+ "border": null,
1151
+ "bottom": null,
1152
+ "display": null,
1153
+ "flex": null,
1154
+ "flex_flow": null,
1155
+ "grid_area": null,
1156
+ "grid_auto_columns": null,
1157
+ "grid_auto_flow": null,
1158
+ "grid_auto_rows": null,
1159
+ "grid_column": null,
1160
+ "grid_gap": null,
1161
+ "grid_row": null,
1162
+ "grid_template_areas": null,
1163
+ "grid_template_columns": null,
1164
+ "grid_template_rows": null,
1165
+ "height": null,
1166
+ "justify_content": null,
1167
+ "justify_items": null,
1168
+ "left": null,
1169
+ "margin": null,
1170
+ "max_height": null,
1171
+ "max_width": null,
1172
+ "min_height": null,
1173
+ "min_width": null,
1174
+ "object_fit": null,
1175
+ "object_position": null,
1176
+ "order": null,
1177
+ "overflow": null,
1178
+ "overflow_x": null,
1179
+ "overflow_y": null,
1180
+ "padding": null,
1181
+ "right": null,
1182
+ "top": null,
1183
+ "visibility": null,
1184
+ "width": null
1185
+ }
1186
+ },
1187
+ "9d58bc10ef844753a17505aca55e079a": {
1188
+ "model_module": "@jupyter-widgets/controls",
1189
+ "model_name": "ProgressStyleModel",
1190
+ "model_module_version": "1.5.0",
1191
+ "state": {
1192
+ "_model_module": "@jupyter-widgets/controls",
1193
+ "_model_module_version": "1.5.0",
1194
+ "_model_name": "ProgressStyleModel",
1195
+ "_view_count": null,
1196
+ "_view_module": "@jupyter-widgets/base",
1197
+ "_view_module_version": "1.2.0",
1198
+ "_view_name": "StyleView",
1199
+ "bar_color": null,
1200
+ "description_width": ""
1201
+ }
1202
+ },
1203
+ "91ca1a302884473f8314c097c41d03fd": {
1204
+ "model_module": "@jupyter-widgets/base",
1205
+ "model_name": "LayoutModel",
1206
+ "model_module_version": "1.2.0",
1207
+ "state": {
1208
+ "_model_module": "@jupyter-widgets/base",
1209
+ "_model_module_version": "1.2.0",
1210
+ "_model_name": "LayoutModel",
1211
+ "_view_count": null,
1212
+ "_view_module": "@jupyter-widgets/base",
1213
+ "_view_module_version": "1.2.0",
1214
+ "_view_name": "LayoutView",
1215
+ "align_content": null,
1216
+ "align_items": null,
1217
+ "align_self": null,
1218
+ "border": null,
1219
+ "bottom": null,
1220
+ "display": null,
1221
+ "flex": null,
1222
+ "flex_flow": null,
1223
+ "grid_area": null,
1224
+ "grid_auto_columns": null,
1225
+ "grid_auto_flow": null,
1226
+ "grid_auto_rows": null,
1227
+ "grid_column": null,
1228
+ "grid_gap": null,
1229
+ "grid_row": null,
1230
+ "grid_template_areas": null,
1231
+ "grid_template_columns": null,
1232
+ "grid_template_rows": null,
1233
+ "height": null,
1234
+ "justify_content": null,
1235
+ "justify_items": null,
1236
+ "left": null,
1237
+ "margin": null,
1238
+ "max_height": null,
1239
+ "max_width": null,
1240
+ "min_height": null,
1241
+ "min_width": null,
1242
+ "object_fit": null,
1243
+ "object_position": null,
1244
+ "order": null,
1245
+ "overflow": null,
1246
+ "overflow_x": null,
1247
+ "overflow_y": null,
1248
+ "padding": null,
1249
+ "right": null,
1250
+ "top": null,
1251
+ "visibility": null,
1252
+ "width": null
1253
+ }
1254
+ },
1255
+ "b71c84b4ca3443d29d650bb8ea0f5458": {
1256
+ "model_module": "@jupyter-widgets/controls",
1257
+ "model_name": "DescriptionStyleModel",
1258
+ "model_module_version": "1.5.0",
1259
+ "state": {
1260
+ "_model_module": "@jupyter-widgets/controls",
1261
+ "_model_module_version": "1.5.0",
1262
+ "_model_name": "DescriptionStyleModel",
1263
+ "_view_count": null,
1264
+ "_view_module": "@jupyter-widgets/base",
1265
+ "_view_module_version": "1.2.0",
1266
+ "_view_name": "StyleView",
1267
+ "description_width": ""
1268
+ }
1269
+ }
1270
+ }
1271
+ }
1272
+ },
1273
+ "nbformat": 4,
1274
+ "nbformat_minor": 0
1275
+ }