Spaces:
Runtime error
Runtime error
Karthikeyan
commited on
Commit
·
95ce447
1
Parent(s):
3151c83
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import NoneStr
|
2 |
+
import os
|
3 |
+
from langchain.chains.question_answering import load_qa_chain
|
4 |
+
from langchain.document_loaders import UnstructuredFileLoader
|
5 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
6 |
+
from langchain.llms import OpenAI
|
7 |
+
from langchain.text_splitter import CharacterTextSplitter
|
8 |
+
from langchain.vectorstores import FAISS
|
9 |
+
from langchain.vectorstores import Chroma
|
10 |
+
from langchain.chains import ConversationalRetrievalChain
|
11 |
+
import gradio as gr
|
12 |
+
import openai
|
13 |
+
from langchain import PromptTemplate, OpenAI, LLMChain
|
14 |
+
import validators
|
15 |
+
import requests
|
16 |
+
import mimetypes
|
17 |
+
import tempfile
|
18 |
+
|
19 |
+
class Chatbot:
|
20 |
+
def __init__(self):
|
21 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
22 |
+
def get_empty_state(self):
|
23 |
+
|
24 |
+
""" Create empty Knowledge base"""
|
25 |
+
|
26 |
+
return {"knowledge_base": None}
|
27 |
+
|
28 |
+
def create_knowledge_base(self,docs):
|
29 |
+
|
30 |
+
"""Create a knowledge base from the given documents.
|
31 |
+
Args:
|
32 |
+
docs (List[str]): List of documents.
|
33 |
+
Returns:
|
34 |
+
FAISS: Knowledge base built from the documents.
|
35 |
+
"""
|
36 |
+
|
37 |
+
# Initialize a CharacterTextSplitter to split the documents into chunks
|
38 |
+
# Each chunk has a maximum length of 500 characters
|
39 |
+
# There is no overlap between the chunks
|
40 |
+
text_splitter = CharacterTextSplitter(
|
41 |
+
separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
|
42 |
+
)
|
43 |
+
|
44 |
+
# Split the documents into chunks using the text_splitter
|
45 |
+
chunks = text_splitter.split_documents(docs)
|
46 |
+
|
47 |
+
# Initialize an OpenAIEmbeddings model to compute embeddings of the chunks
|
48 |
+
embeddings = OpenAIEmbeddings()
|
49 |
+
|
50 |
+
# Build a knowledge base using FAISS from the chunks and their embeddings
|
51 |
+
knowledge_base = Chroma.from_documents(chunks, embeddings)
|
52 |
+
|
53 |
+
# Return the resulting knowledge base
|
54 |
+
return knowledge_base
|
55 |
+
|
56 |
+
|
57 |
+
def upload_file(self,file_paths):
|
58 |
+
"""Upload a file and create a knowledge base from its contents.
|
59 |
+
Args:
|
60 |
+
file_paths : The files to uploaded.
|
61 |
+
Returns:
|
62 |
+
tuple: A tuple containing the file name and the knowledge base.
|
63 |
+
"""
|
64 |
+
|
65 |
+
file_paths = [i.name for i in file_paths]
|
66 |
+
print(file_paths)
|
67 |
+
|
68 |
+
|
69 |
+
loaders = [UnstructuredFileLoader(file_obj, strategy="fast") for file_obj in file_paths]
|
70 |
+
|
71 |
+
# Load the contents of the file using the loader
|
72 |
+
docs = []
|
73 |
+
for loader in loaders:
|
74 |
+
docs.extend(loader.load())
|
75 |
+
|
76 |
+
# Create a knowledge base from the loaded documents using the create_knowledge_base() method
|
77 |
+
knowledge_base = self.create_knowledge_base(docs)
|
78 |
+
|
79 |
+
|
80 |
+
# Return a tuple containing the file name and the knowledge base
|
81 |
+
return file_paths, {"knowledge_base": knowledge_base}
|
82 |
+
|
83 |
+
def add_text(self,history, text):
|
84 |
+
history = history + [(text, None)]
|
85 |
+
return history, gr.update(value="", interactive=False)
|
86 |
+
|
87 |
+
|
88 |
+
|
89 |
+
def upload_multiple_urls(self,urls):
|
90 |
+
urlss = [url.strip() for url in urls.split(',')]
|
91 |
+
all_docs = []
|
92 |
+
file_paths = []
|
93 |
+
for url in urlss:
|
94 |
+
if validators.url(url):
|
95 |
+
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',}
|
96 |
+
r = requests.get(url,headers=headers)
|
97 |
+
if r.status_code != 200:
|
98 |
+
raise ValueError(
|
99 |
+
"Check the url of your file; returned status code %s" % r.status_code
|
100 |
+
)
|
101 |
+
content_type = r.headers.get("content-type")
|
102 |
+
file_extension = mimetypes.guess_extension(content_type)
|
103 |
+
temp_file = tempfile.NamedTemporaryFile(suffix=file_extension, delete=False)
|
104 |
+
temp_file.write(r.content)
|
105 |
+
file_path = temp_file.name
|
106 |
+
file_paths.append(file_path)
|
107 |
+
|
108 |
+
loaders = [UnstructuredFileLoader(file_obj, strategy="fast") for file_obj in file_paths]
|
109 |
+
|
110 |
+
# Load the contents of the file using the loader
|
111 |
+
docs = []
|
112 |
+
for loader in loaders:
|
113 |
+
docs.extend(loader.load())
|
114 |
+
|
115 |
+
# Create a knowledge base from the loaded documents using the create_knowledge_base() method
|
116 |
+
knowledge_base = self.create_knowledge_base(docs)
|
117 |
+
|
118 |
+
return file_paths,{"knowledge_base":knowledge_base}
|
119 |
+
|
120 |
+
def answer_question(self, question,history,state):
|
121 |
+
"""Answer a question based on the current knowledge base.
|
122 |
+
Args:
|
123 |
+
state (dict): The current state containing the knowledge base.
|
124 |
+
Returns:
|
125 |
+
str: The answer to the question.
|
126 |
+
"""
|
127 |
+
|
128 |
+
|
129 |
+
# Retrieve the knowledge base from the state dictionary
|
130 |
+
knowledge_base = state["knowledge_base"]
|
131 |
+
retriever = knowledge_base.as_retriever()
|
132 |
+
qa = ConversationalRetrievalChain.from_llm(
|
133 |
+
llm=OpenAI(temperature=0.5),
|
134 |
+
retriever=retriever,
|
135 |
+
return_source_documents=False)
|
136 |
+
# Set the question for which we want to find the answer
|
137 |
+
res = []
|
138 |
+
question = history[-1][0]
|
139 |
+
for human, ai in history[:-1]:
|
140 |
+
pair = (human, ai)
|
141 |
+
res.append(pair)
|
142 |
+
|
143 |
+
chat_history = res
|
144 |
+
#print(chat_history)
|
145 |
+
query = question
|
146 |
+
result = qa({"question": query, "chat_history": chat_history})
|
147 |
+
# Perform a similarity search on the knowledge base to retrieve relevant documents
|
148 |
+
response = result["answer"]
|
149 |
+
# Return the response as the answer to the question
|
150 |
+
history[-1][1] = response
|
151 |
+
return history
|
152 |
+
|
153 |
+
|
154 |
+
def extract_excel_data(self,file_path):
|
155 |
+
# Read the Excel file
|
156 |
+
df = pd.read_excel(file_path)
|
157 |
+
|
158 |
+
# Flatten the data to a single list
|
159 |
+
data_list = []
|
160 |
+
for _, row in df.iterrows():
|
161 |
+
data_list.extend(row.tolist())
|
162 |
+
|
163 |
+
return data_list
|
164 |
+
|
165 |
+
def comparing_chemicals(self,excel_file_path,chemicals):
|
166 |
+
chemistry_capability = self.extract_excel_data(excel_file_path.name)
|
167 |
+
response = openai.Completion.create(
|
168 |
+
engine="text-davinci-003",
|
169 |
+
prompt= f"""Analyse the following text delimited by triple backticks to return the comman chemicals.
|
170 |
+
text : ```{chemicals} {chemistry_capability}```.
|
171 |
+
result should be in bullet points format.
|
172 |
+
""",
|
173 |
+
max_tokens=100,
|
174 |
+
n=1,
|
175 |
+
stop=None,
|
176 |
+
temperature=0,
|
177 |
+
top_p=1.0,
|
178 |
+
frequency_penalty=0.0,
|
179 |
+
presence_penalty=0.0
|
180 |
+
)
|
181 |
+
|
182 |
+
result = response.choices[0].text.strip()
|
183 |
+
return result
|
184 |
+
|
185 |
+
def clear_function(self,state):
|
186 |
+
state.clear()
|
187 |
+
# state = gr.State(self.get_empty_state())
|
188 |
+
|
189 |
+
def gradio_interface(self):
|
190 |
+
|
191 |
+
"""Create the Gradio interface for the Chemical Identifier."""
|
192 |
+
|
193 |
+
with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
|
194 |
+
state = gr.State(self.get_empty_state())
|
195 |
+
with gr.Column(elem_id="col-container"):
|
196 |
+
gr.HTML(
|
197 |
+
"""<hr style="border-top: 5px solid white;">"""
|
198 |
+
)
|
199 |
+
gr.HTML(
|
200 |
+
"""<br>
|
201 |
+
<h1 style="text-align:center;">
|
202 |
+
Multi URL and Doc Chatbot Q&A
|
203 |
+
</h1> """
|
204 |
+
)
|
205 |
+
gr.HTML(
|
206 |
+
"""<hr style="border-top: 5px solid white;">"""
|
207 |
+
)
|
208 |
+
|
209 |
+
gr.Markdown("**Upload your file**")
|
210 |
+
with gr.Row(elem_id="row-flex"):
|
211 |
+
with gr.Column(scale=1, min_width=0):
|
212 |
+
file_url = gr.Textbox(label='file url :',show_label=True, placeholder="")
|
213 |
+
with gr.Row(elem_id="row-flex"):
|
214 |
+
with gr.Column(scale=0.90, min_width=160):
|
215 |
+
file_output = gr.File(elem_classes="heightfit")
|
216 |
+
with gr.Column(scale=0.10, min_width=160):
|
217 |
+
upload_button = gr.UploadButton(
|
218 |
+
"Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"],
|
219 |
+
elem_classes="heightfit",
|
220 |
+
file_count = "multiple")
|
221 |
+
with gr.Row():
|
222 |
+
chatbot = gr.Chatbot([], elem_id="chatbot")
|
223 |
+
with gr.Row():
|
224 |
+
txt = gr.Textbox(
|
225 |
+
label = "Question",
|
226 |
+
show_label=True,
|
227 |
+
placeholder="Enter text and press enter, or upload an image",
|
228 |
+
)
|
229 |
+
with gr.Row():
|
230 |
+
clear_btn = gr.Button(value="Clear")
|
231 |
+
|
232 |
+
txt_msg = txt.submit(self.add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
|
233 |
+
self.answer_question, [txt,chatbot,state], chatbot
|
234 |
+
)
|
235 |
+
txt_msg.then(lambda: gr.update(interactive=True), None, [txt], queue=False)
|
236 |
+
|
237 |
+
file_url.submit(self.upload_multiple_urls, file_url, [file_output, state])
|
238 |
+
clear_btn.click(self.clear_function,[state],[])
|
239 |
+
clear_btn.click(lambda: None, None, chatbot, queue=False)
|
240 |
+
upload_button.upload(self.upload_file, upload_button, [file_output,state])
|
241 |
+
demo.queue().launch(debug=True)
|
242 |
+
|
243 |
+
if __name__=="__main__":
|
244 |
+
chatbot = Chatbot()
|
245 |
+
chatbot.gradio_interface()
|