farhananis005 commited on
Commit
bc48646
1 Parent(s): daaf4e6

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. app.py +197 -0
  3. docs_db/index.faiss +3 -0
  4. docs_db/index.pkl +3 -0
  5. requirements.txt +10 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ docs_db/index.faiss filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import openai
3
+
4
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
5
+ os.environ["OPENAI_API_KEY"]
6
+
7
+
8
+ def save_docs(docs):
9
+
10
+ import shutil
11
+ import os
12
+
13
+ output_dir = "/home/user/app/docs/"
14
+
15
+ if os.path.exists(output_dir):
16
+ shutil.rmtree(output_dir)
17
+
18
+ if not os.path.exists(output_dir):
19
+ os.makedirs(output_dir)
20
+
21
+ for doc in docs:
22
+ shutil.copy(doc.name, output_dir)
23
+
24
+ return "Successful!"
25
+
26
+
27
+ def process_docs():
28
+
29
+ from langchain.document_loaders import PyPDFLoader
30
+ from langchain.document_loaders import DirectoryLoader
31
+ from langchain.document_loaders import TextLoader
32
+ from langchain.document_loaders import Docx2txtLoader
33
+ from langchain.document_loaders.csv_loader import CSVLoader
34
+ from langchain.document_loaders import UnstructuredExcelLoader
35
+ from langchain.vectorstores import FAISS
36
+ from langchain_openai import OpenAIEmbeddings
37
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
38
+
39
+ loader1 = DirectoryLoader(
40
+ "/home/user/app/docs/", glob="./*.pdf", loader_cls=PyPDFLoader
41
+ )
42
+ document1 = loader1.load()
43
+
44
+ loader2 = DirectoryLoader(
45
+ "/home/user/app/docs/", glob="./*.txt", loader_cls=TextLoader
46
+ )
47
+ document2 = loader2.load()
48
+
49
+ loader3 = DirectoryLoader(
50
+ "/home/user/app/docs/", glob="./*.docx", loader_cls=Docx2txtLoader
51
+ )
52
+ document3 = loader3.load()
53
+
54
+ loader4 = DirectoryLoader(
55
+ "/home/user/app/docs/", glob="./*.csv", loader_cls=CSVLoader
56
+ )
57
+ document4 = loader4.load()
58
+
59
+ loader5 = DirectoryLoader(
60
+ "/home/user/app/docs/", glob="./*.xlsx", loader_cls=UnstructuredExcelLoader
61
+ )
62
+ document5 = loader5.load()
63
+
64
+ document1.extend(document2)
65
+ document1.extend(document3)
66
+ document1.extend(document4)
67
+ document1.extend(document5)
68
+
69
+ text_splitter = RecursiveCharacterTextSplitter(
70
+ chunk_size=1000, chunk_overlap=200, length_function=len
71
+ )
72
+
73
+ docs = text_splitter.split_documents(document1)
74
+ embeddings = OpenAIEmbeddings()
75
+
76
+ docs_db = FAISS.from_documents(docs, embeddings)
77
+ docs_db.save_local("/home/user/app/docs_db/")
78
+
79
+ return "Successful!"
80
+
81
+
82
+ global agent
83
+
84
+
85
+ def create_agent():
86
+
87
+ from langchain_openai import ChatOpenAI
88
+ from langchain.chains.conversation.memory import ConversationSummaryBufferMemory
89
+ from langchain.chains import ConversationChain
90
+
91
+ global agent
92
+
93
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k")
94
+ memory = ConversationSummaryBufferMemory(llm=llm, max_token_limit=1000)
95
+ agent = ConversationChain(llm=llm, memory=memory, verbose=True)
96
+
97
+ return "Successful!"
98
+
99
+
100
+ def formatted_response(docs, question, response, state):
101
+
102
+ formatted_output = response + "\n\nSources"
103
+
104
+ for i, doc in enumerate(docs):
105
+ source_info = doc.metadata.get("source", "Unknown source")
106
+ page_info = doc.metadata.get("page", None)
107
+
108
+ doc_name = source_info.split("/")[-1].strip()
109
+
110
+ if page_info is not None:
111
+ formatted_output += f"\n{doc_name}\tpage no {page_info}"
112
+ else:
113
+ formatted_output += f"\n{doc_name}"
114
+
115
+ state.append((question, formatted_output))
116
+ return state, state
117
+
118
+
119
+ def search_docs(prompt, question, state):
120
+
121
+ from langchain_openai import OpenAIEmbeddings
122
+ from langchain.vectorstores import FAISS
123
+ from langchain.callbacks import get_openai_callback
124
+
125
+ global agent
126
+ agent = agent
127
+
128
+ state = state or []
129
+
130
+ embeddings = OpenAIEmbeddings()
131
+ docs_db = FAISS.load_local(
132
+ "/home/user/app/docs_db/", embeddings, allow_dangerous_deserialization=True
133
+ )
134
+ docs = docs_db.similarity_search(question)
135
+
136
+ prompt += "\n\n"
137
+ prompt += question
138
+ prompt += "\n\n"
139
+ prompt += str(docs)
140
+
141
+ with get_openai_callback() as cb:
142
+ response = agent.predict(input=prompt)
143
+ print(cb)
144
+
145
+ return formatted_response(docs, question, response, state)
146
+
147
+
148
+ import gradio as gr
149
+
150
+ css = """
151
+ .col{
152
+ max-width: 75%;
153
+ margin: 0 auto;
154
+ display: flex;
155
+ flex-direction: column;
156
+ justify-content: center;
157
+ align-items: center;
158
+ }
159
+ """
160
+
161
+ with gr.Blocks(css=css) as demo:
162
+ gr.Markdown("## <center>Your AI Medical Assistant</center>")
163
+
164
+ with gr.Tab("Your AI Medical Assistant"):
165
+ with gr.Column(elem_classes="col"):
166
+
167
+ with gr.Tab("Query Documents"):
168
+ with gr.Column():
169
+ create_agent_button = gr.Button("Create Agent")
170
+ create_agent_output = gr.Textbox(label="Output")
171
+
172
+ docs_prompt_input = gr.Textbox(label="Custom Prompt")
173
+
174
+ docs_chatbot = gr.Chatbot(label="Chats")
175
+ docs_state = gr.State()
176
+
177
+ docs_search_input = gr.Textbox(label="Question")
178
+ docs_search_button = gr.Button("Search")
179
+
180
+ gr.ClearButton(
181
+ [docs_prompt_input, docs_search_input, create_agent_output]
182
+ )
183
+
184
+ #########################################################################################################
185
+
186
+ create_agent_button.click(create_agent, inputs=None, outputs=create_agent_output)
187
+
188
+ docs_search_button.click(
189
+ search_docs,
190
+ inputs=[docs_prompt_input, docs_search_input, docs_state],
191
+ outputs=[docs_chatbot, docs_state],
192
+ )
193
+
194
+ #########################################################################################################
195
+
196
+ demo.queue()
197
+ demo.launch()
docs_db/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b27c801c816a9820b05d62beeace7c74b374f41c715998a8c7bfb7414f91042e
3
+ size 61538349
docs_db/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae13e975e65af60bddcb1fe944a39940da0cd02013c5164496557f78d77c2901
3
+ size 9052961
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ langchain-openai
3
+ PyPDF2
4
+ pypdf
5
+ docx2txt
6
+ unstructured
7
+ gradio
8
+ faiss-cpu
9
+ openai
10
+ tiktoken