Sophia Yang commited on
Commit
97aaeb6
1 Parent(s): e5c24ed
Files changed (3) hide show
  1. Dockerfile +28 -0
  2. LangChain_QA_Panel_App.ipynb +239 -0
  3. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FROM python:3.9
2
+ FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
3
+ LABEL maintainer="Hugging Face"
4
+ LABEL repository="transformers"
5
+
6
+ RUN apt update && \
7
+ apt install -y bash \
8
+ build-essential \
9
+ git \
10
+ curl \
11
+ ca-certificates \
12
+ python3 \
13
+ python3-pip && \
14
+ rm -rf /var/lib/apt/lists
15
+
16
+
17
+ WORKDIR /code
18
+
19
+ COPY ./requirements.txt /code/requirements.txt
20
+ RUN python3 -m pip install --no-cache-dir --upgrade pip
21
+ RUN python3 -m pip install --no-cache-dir --upgrade -r /code/requirements.txt
22
+
23
+ COPY . .
24
+
25
+ CMD ["panel", "serve", "/code/LangChain_QA_Panel_App.ipynb", "--address", "0.0.0.0", "--port", "7860", "--allow-websocket-origin", "sophiamyang-panel-pdf-qa.hf.space", "--allow-websocket-origin", "0.0.0.0:7860"]
26
+
27
+ RUN mkdir /.cache
28
+ RUN chmod 777 /.cache
LangChain_QA_Panel_App.ipynb ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "attachments": {},
5
+ "cell_type": "markdown",
6
+ "id": "04815d1b-44ee-4bd3-878e-fa0c3bf9fa7f",
7
+ "metadata": {
8
+ "tags": []
9
+ },
10
+ "source": [
11
+ "# LangChain QA Panel App\n",
12
+ "\n",
13
+ "This notebook shows how to make this app:"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": null,
19
+ "id": "a181568b-9cde-4a55-a853-4d2a41dbfdad",
20
+ "metadata": {
21
+ "tags": []
22
+ },
23
+ "outputs": [],
24
+ "source": [
25
+ "#!pip install langchain openai chromadb tiktoken pypdf panel\n"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": null,
31
+ "id": "9a464409-d064-4766-a9cb-5119f6c4b8f5",
32
+ "metadata": {
33
+ "tags": []
34
+ },
35
+ "outputs": [],
36
+ "source": [
37
+ "import os \n",
38
+ "from langchain.chains import RetrievalQA\n",
39
+ "from langchain.llms import OpenAI\n",
40
+ "from langchain.document_loaders import TextLoader\n",
41
+ "from langchain.document_loaders import PyPDFLoader\n",
42
+ "from langchain.indexes import VectorstoreIndexCreator\n",
43
+ "from langchain.text_splitter import CharacterTextSplitter\n",
44
+ "from langchain.embeddings import OpenAIEmbeddings\n",
45
+ "from langchain.vectorstores import Chroma\n",
46
+ "import panel as pn\n",
47
+ "import tempfile\n"
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": null,
53
+ "id": "b2d07ea5-9ff2-4c96-a8dc-92895d870b73",
54
+ "metadata": {
55
+ "tags": []
56
+ },
57
+ "outputs": [],
58
+ "source": [
59
+ "pn.extension('texteditor', template=\"bootstrap\", sizing_mode='stretch_width')\n",
60
+ "pn.state.template.param.update(\n",
61
+ " main_max_width=\"690px\",\n",
62
+ " header_background=\"#F08080\",\n",
63
+ ")"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": null,
69
+ "id": "763db4d0-3436-41d3-8b0f-e66ce16468cd",
70
+ "metadata": {
71
+ "tags": []
72
+ },
73
+ "outputs": [],
74
+ "source": [
75
+ "file_input = pn.widgets.FileInput(width=300)\n",
76
+ "\n",
77
+ "openaikey = pn.widgets.PasswordInput(\n",
78
+ " value=\"\", placeholder=\"Enter your OpenAI API Key here...\", width=300\n",
79
+ ")\n",
80
+ "prompt = pn.widgets.TextEditor(\n",
81
+ " value=\"\", placeholder=\"Enter your questions here...\", height=160, toolbar=False\n",
82
+ ")\n",
83
+ "run_button = pn.widgets.Button(name=\"Run!\")\n",
84
+ "\n",
85
+ "select_k = pn.widgets.IntSlider(\n",
86
+ " name=\"Number of relevant chunks\", start=1, end=5, step=1, value=2\n",
87
+ ")\n",
88
+ "select_chain_type = pn.widgets.RadioButtonGroup(\n",
89
+ " name='Chain type', \n",
90
+ " options=['stuff', 'map_reduce', \"refine\", \"map_rerank\"]\n",
91
+ ")\n",
92
+ "\n",
93
+ "widgets = pn.Row(\n",
94
+ " pn.Column(prompt, run_button, margin=5),\n",
95
+ " pn.Card(\n",
96
+ " \"Chain type:\",\n",
97
+ " pn.Column(select_chain_type, select_k),\n",
98
+ " title=\"Advanced settings\", margin=10\n",
99
+ " ), width=600\n",
100
+ ")"
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "execution_count": null,
106
+ "id": "9b83cc06-3401-498f-8f84-8a98370f3121",
107
+ "metadata": {
108
+ "tags": []
109
+ },
110
+ "outputs": [],
111
+ "source": [
112
+ "def qa(file, query, chain_type, k):\n",
113
+ " # load document\n",
114
+ " loader = PyPDFLoader(file)\n",
115
+ " documents = loader.load()\n",
116
+ " # split the documents into chunks\n",
117
+ " text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
118
+ " texts = text_splitter.split_documents(documents)\n",
119
+ " # select which embeddings we want to use\n",
120
+ " embeddings = OpenAIEmbeddings()\n",
121
+ " # create the vectorestore to use as the index\n",
122
+ " db = Chroma.from_documents(texts, embeddings)\n",
123
+ " # expose this index in a retriever interface\n",
124
+ " retriever = db.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": k})\n",
125
+ " # create a chain to answer questions \n",
126
+ " qa = RetrievalQA.from_chain_type(\n",
127
+ " llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True)\n",
128
+ " result = qa({\"query\": query})\n",
129
+ " print(result['result'])\n",
130
+ " return result"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "execution_count": null,
136
+ "id": "2722f43b-daf6-4d17-a842-41203ae9b140",
137
+ "metadata": {
138
+ "tags": []
139
+ },
140
+ "outputs": [],
141
+ "source": [
142
+ "# result = qa(\"example.pdf\", \"what is the total number of AI publications?\")"
143
+ ]
144
+ },
145
+ {
146
+ "cell_type": "code",
147
+ "execution_count": null,
148
+ "id": "60e1b3d3-c0d2-4260-ae0c-26b03f1b8824",
149
+ "metadata": {},
150
+ "outputs": [],
151
+ "source": [
152
+ "convos = [] # store all panel objects in a list\n",
153
+ "\n",
154
+ "def qa_result(_):\n",
155
+ " os.environ[\"OPENAI_API_KEY\"] = openaikey.value\n",
156
+ " \n",
157
+ " # save pdf file to a temp file \n",
158
+ " if file_input.value is not None:\n",
159
+ " file_input.save(\"temp.pdf\")\n",
160
+ " \n",
161
+ " prompt_text = prompt.value\n",
162
+ " if prompt_text:\n",
163
+ " result = qa(file=\"temp.pdf\", query=prompt_text, chain_type=select_chain_type.value, k=select_k.value)\n",
164
+ " convos.extend([\n",
165
+ " pn.Row(\n",
166
+ " pn.panel(\"\\U0001F60A\", width=10),\n",
167
+ " prompt_text,\n",
168
+ " width=600\n",
169
+ " ),\n",
170
+ " pn.Row(\n",
171
+ " pn.panel(\"\\U0001F916\", width=10),\n",
172
+ " pn.Column(\n",
173
+ " result[\"result\"],\n",
174
+ " \"Relevant source text:\",\n",
175
+ " pn.pane.Markdown('\\n--------------------------------------------------------------------\\n'.join(doc.page_content for doc in result[\"source_documents\"]))\n",
176
+ " )\n",
177
+ " )\n",
178
+ " ])\n",
179
+ " #return convos\n",
180
+ " return pn.Column(*convos, margin=15, width=575, min_height=400)\n"
181
+ ]
182
+ },
183
+ {
184
+ "cell_type": "code",
185
+ "execution_count": null,
186
+ "id": "c3a70857-0b98-4f62-a9c0-b62ca42b474c",
187
+ "metadata": {
188
+ "tags": []
189
+ },
190
+ "outputs": [],
191
+ "source": [
192
+ "qa_interactive = pn.panel(\n",
193
+ " pn.bind(qa_result, run_button),\n",
194
+ " loading_indicator=True\n",
195
+ ")"
196
+ ]
197
+ },
198
+ {
199
+ "cell_type": "code",
200
+ "execution_count": null,
201
+ "id": "1b0ec253-2bcd-4f91-96d8-d8456e900a58",
202
+ "metadata": {
203
+ "tags": []
204
+ },
205
+ "outputs": [],
206
+ "source": [
207
+ "# layout\n",
208
+ "pn.Column(\n",
209
+ " \"## \\U0001F60A! Question Answering with your PDF file\",\n",
210
+ " pn.Row(file_input,openaikey),\n",
211
+ " qa_interactive,\n",
212
+ " widgets\n",
213
+ "\n",
214
+ ").servable()"
215
+ ]
216
+ }
217
+ ],
218
+ "metadata": {
219
+ "kernelspec": {
220
+ "display_name": "Python 3 (ipykernel)",
221
+ "language": "python",
222
+ "name": "python3"
223
+ },
224
+ "language_info": {
225
+ "codemirror_mode": {
226
+ "name": "ipython",
227
+ "version": 3
228
+ },
229
+ "file_extension": ".py",
230
+ "mimetype": "text/x-python",
231
+ "name": "python",
232
+ "nbconvert_exporter": "python",
233
+ "pygments_lexer": "ipython3",
234
+ "version": "3.10.10"
235
+ }
236
+ },
237
+ "nbformat": 4,
238
+ "nbformat_minor": 5
239
+ }
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ langchain
2
+ openai
3
+ chromadb
4
+ pypdf
5
+ panel
6
+ notebook