Spaces:
Runtime error
Runtime error
Commit
·
ef01944
1
Parent(s):
6b347f1
Update app.py
Browse files
app.py
CHANGED
@@ -45,6 +45,24 @@ import os
|
|
45 |
from langchain.llms import CTransformers
|
46 |
import streamlit as st
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
def load_model():
|
49 |
# model_path=HuggingFaceHub(repo_id="vilsonrodrigues/falcon-7b-instruct-sharded")
|
50 |
|
@@ -94,9 +112,24 @@ def load_model():
|
|
94 |
# temperature=temperature, # type: ignore
|
95 |
)
|
96 |
return llm
|
97 |
-
|
98 |
|
99 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
# DB_DIR: str = os.path.join(ABS_PATH, "db")
|
101 |
"""
|
102 |
Creates a vector database using document loaders and embeddings.
|
@@ -107,21 +140,21 @@ def create_vector_database(uploaded_files):
|
|
107 |
|
108 |
"""
|
109 |
# Initialize loaders for different file types
|
110 |
-
loaders = {
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
}
|
125 |
# pdf_loader = DirectoryLoader("data/", glob="**/*.pdf", loader_cls=PyPDFLoader)
|
126 |
# markdown_loader = DirectoryLoader("data/", glob="**/*.md", loader_cls=UnstructuredMarkdownLoader)
|
127 |
# text_loader = DirectoryLoader("data/", glob="**/*.txt", loader_cls=TextLoader)
|
@@ -154,17 +187,17 @@ def create_vector_database(uploaded_files):
|
|
154 |
# }
|
155 |
|
156 |
# Load documents from uploaded files using the appropriate loaders
|
157 |
-
loaded_documents = []
|
158 |
-
for uploaded_file in uploaded_files:
|
159 |
-
# file_extension = os.path.splitext(uploaded_file.name)[-1].lower()[1:]
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
# loader = loaders[file_extension](uploaded_file)
|
169 |
# # loader = loader_cls.load(uploaded_file.name) # Pass the file path to the loader constructor
|
170 |
# # # content = uploaded_file.read() # Read the file content
|
@@ -334,7 +367,8 @@ def main():
|
|
334 |
llm = load_model()
|
335 |
prompt = set_custom_prompt()
|
336 |
CONDENSE_QUESTION_PROMPT = set_custom_prompt_condense()
|
337 |
-
|
|
|
338 |
response = retrieve_bot_answer(query)
|
339 |
|
340 |
# Display bot response
|
|
|
45 |
from langchain.llms import CTransformers
|
46 |
import streamlit as st
|
47 |
|
48 |
+
FILE_LOADER_MAPPING = {
|
49 |
+
".csv": (CSVLoader, {"encoding": "utf-8"}),
|
50 |
+
".doc": (UnstructuredWordDocumentLoader, {}),
|
51 |
+
".docx": (UnstructuredWordDocumentLoader, {}),
|
52 |
+
".enex": (EverNoteLoader, {}),
|
53 |
+
".epub": (UnstructuredEPubLoader, {}),
|
54 |
+
".html": (UnstructuredHTMLLoader, {}),
|
55 |
+
".md": (UnstructuredMarkdownLoader, {}),
|
56 |
+
".odt": (UnstructuredODTLoader, {}),
|
57 |
+
".pdf": (PyPDFLoader, {}),
|
58 |
+
".ppt": (UnstructuredPowerPointLoader, {}),
|
59 |
+
".pptx": (UnstructuredPowerPointLoader, {}),
|
60 |
+
".txt": (TextLoader, {"encoding": "utf8"}),
|
61 |
+
".ipynb": (NotebookLoader, {}),
|
62 |
+
".py": (PythonLoader, {}),
|
63 |
+
# Add more mappings for other file extensions and loaders as needed
|
64 |
+
}
|
65 |
+
|
66 |
def load_model():
|
67 |
# model_path=HuggingFaceHub(repo_id="vilsonrodrigues/falcon-7b-instruct-sharded")
|
68 |
|
|
|
112 |
# temperature=temperature, # type: ignore
|
113 |
)
|
114 |
return llm
|
|
|
115 |
|
116 |
+
def load_document(
|
117 |
+
file_path: str,
|
118 |
+
mapping: dict = FILE_LOADER_MAPPING,
|
119 |
+
default_loader: BaseLoader = UnstructuredFileLoader,
|
120 |
+
) -> Document:
|
121 |
+
# Choose loader from mapping, load default if no match found
|
122 |
+
ext = "." + file_path.rsplit(".", 1)[-1]
|
123 |
+
if ext in mapping:
|
124 |
+
loader_class, loader_args = mapping[ext]
|
125 |
+
loader = loader_class(file_path, **loader_args)
|
126 |
+
else:
|
127 |
+
loader = default_loader(file_path)
|
128 |
+
loaded_documents = []
|
129 |
+
loaded_documents.extend(loader.load())
|
130 |
+
return loaded_documents
|
131 |
+
|
132 |
+
def create_vector_database(loaded_documents):
|
133 |
# DB_DIR: str = os.path.join(ABS_PATH, "db")
|
134 |
"""
|
135 |
Creates a vector database using document loaders and embeddings.
|
|
|
140 |
|
141 |
"""
|
142 |
# Initialize loaders for different file types
|
143 |
+
# loaders = {
|
144 |
+
# "pdf": UnstructuredPDFLoader,
|
145 |
+
# "md": UnstructuredMarkdownLoader,
|
146 |
+
# "txt": TextLoader,
|
147 |
+
# "csv": CSVLoader,
|
148 |
+
# "py": PythonLoader,
|
149 |
+
# "epub": UnstructuredEPubLoader,
|
150 |
+
# "html": UnstructuredHTMLLoader,
|
151 |
+
# "ppt": UnstructuredPowerPointLoader,
|
152 |
+
# "pptx": UnstructuredPowerPointLoader,
|
153 |
+
# "doc": UnstructuredWordDocumentLoader,
|
154 |
+
# "docx": UnstructuredWordDocumentLoader,
|
155 |
+
# "odt": UnstructuredODTLoader,
|
156 |
+
# "ipynb": NotebookLoader
|
157 |
+
# }
|
158 |
# pdf_loader = DirectoryLoader("data/", glob="**/*.pdf", loader_cls=PyPDFLoader)
|
159 |
# markdown_loader = DirectoryLoader("data/", glob="**/*.md", loader_cls=UnstructuredMarkdownLoader)
|
160 |
# text_loader = DirectoryLoader("data/", glob="**/*.txt", loader_cls=TextLoader)
|
|
|
187 |
# }
|
188 |
|
189 |
# Load documents from uploaded files using the appropriate loaders
|
190 |
+
# loaded_documents = []
|
191 |
+
# for uploaded_file in uploaded_files:
|
192 |
+
# # file_extension = os.path.splitext(uploaded_file.name)[-1].lower()[1:]
|
193 |
+
# file_extension = os.path.splitext(uploaded_file.name)[-1][1:].lower()
|
194 |
+
# if file_extension in loaders:
|
195 |
+
# # Read the content of the uploaded file
|
196 |
+
# file_content = uploaded_file.read()
|
197 |
|
198 |
+
# # Pass the content to the loader for processing
|
199 |
+
# loader = loaders[file_extension](file_content)
|
200 |
+
# loaded_documents.extend(loader.load())
|
201 |
# loader = loaders[file_extension](uploaded_file)
|
202 |
# # loader = loader_cls.load(uploaded_file.name) # Pass the file path to the loader constructor
|
203 |
# # # content = uploaded_file.read() # Read the file content
|
|
|
367 |
llm = load_model()
|
368 |
prompt = set_custom_prompt()
|
369 |
CONDENSE_QUESTION_PROMPT = set_custom_prompt_condense()
|
370 |
+
loaded_documents = load_document(uploaded_files)
|
371 |
+
db = create_vector_database(loaded_documents)
|
372 |
response = retrieve_bot_answer(query)
|
373 |
|
374 |
# Display bot response
|