Spaces:
Runtime error
Runtime error
Commit
·
b62d7f8
1
Parent(s):
29f8c44
Update app.py
Browse files
app.py
CHANGED
@@ -135,6 +135,23 @@ def create_vector_database(uploaded_files):
|
|
135 |
# docx_loader = DirectoryLoader("data/", glob="**/*.docx", loader_cls=UnstructuredWordDocumentLoader)
|
136 |
# odt_loader = DirectoryLoader("data/", glob="**/*.odt", loader_cls=UnstructuredODTLoader)
|
137 |
# notebook_loader = DirectoryLoader("data/", glob="**/*.ipynb", loader_cls=NotebookLoader)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
# Load documents from uploaded files using the appropriate loaders
|
140 |
loaded_documents = []
|
@@ -142,10 +159,10 @@ def create_vector_database(uploaded_files):
|
|
142 |
# file_extension = os.path.splitext(uploaded_file.name)[-1].lower()[1:]
|
143 |
file_extension = os.path.splitext(uploaded_file.name)[-1][1:].lower()
|
144 |
if file_extension in loaders:
|
145 |
-
loader_cls = loaders[file_extension]
|
146 |
loader = loader_cls.load(uploaded_file.name) # Pass the file path to the loader constructor
|
147 |
# content = uploaded_file.read() # Read the file content
|
148 |
-
|
149 |
|
150 |
# all_loaders = [pdf_loader, markdown_loader, text_loader, csv_loader, python_loader, epub_loader, html_loader, ppt_loader, pptx_loader, doc_loader, docx_loader, odt_loader, notebook_loader]
|
151 |
|
|
|
135 |
# docx_loader = DirectoryLoader("data/", glob="**/*.docx", loader_cls=UnstructuredWordDocumentLoader)
|
136 |
# odt_loader = DirectoryLoader("data/", glob="**/*.odt", loader_cls=UnstructuredODTLoader)
|
137 |
# notebook_loader = DirectoryLoader("data/", glob="**/*.ipynb", loader_cls=NotebookLoader)
|
138 |
+
# FILE_LOADER_MAPPING = {
|
139 |
+
# ".csv": (CSVLoader, {"encoding": "utf-8"}),
|
140 |
+
# ".doc": (UnstructuredWordDocumentLoader, {}),
|
141 |
+
# ".docx": (UnstructuredWordDocumentLoader, {}),
|
142 |
+
# ".enex": (EverNoteLoader, {}),
|
143 |
+
# ".epub": (UnstructuredEPubLoader, {}),
|
144 |
+
# ".html": (UnstructuredHTMLLoader, {}),
|
145 |
+
# ".md": (UnstructuredMarkdownLoader, {}),
|
146 |
+
# ".odt": (UnstructuredODTLoader, {}),
|
147 |
+
# ".pdf": (PyPDFLoader, {}),
|
148 |
+
# ".ppt": (UnstructuredPowerPointLoader, {}),
|
149 |
+
# ".pptx": (UnstructuredPowerPointLoader, {}),
|
150 |
+
# ".txt": (TextLoader, {"encoding": "utf8"}),
|
151 |
+
# ".ipynb": (NotebookLoader, {}),
|
152 |
+
# ".py": (PythonLoader, {}),
|
153 |
+
# # Add more mappings for other file extensions and loaders as needed
|
154 |
+
# }
|
155 |
|
156 |
# Load documents from uploaded files using the appropriate loaders
|
157 |
loaded_documents = []
|
|
|
159 |
# file_extension = os.path.splitext(uploaded_file.name)[-1].lower()[1:]
|
160 |
file_extension = os.path.splitext(uploaded_file.name)[-1][1:].lower()
|
161 |
if file_extension in loaders:
|
162 |
+
loader_cls = loaders[file_extension](uploaded_file)
|
163 |
loader = loader_cls.load(uploaded_file.name) # Pass the file path to the loader constructor
|
164 |
# content = uploaded_file.read() # Read the file content
|
165 |
+
loaded_documents.extend(loader.load(content))
|
166 |
|
167 |
# all_loaders = [pdf_loader, markdown_loader, text_loader, csv_loader, python_loader, epub_loader, html_loader, ppt_loader, pptx_loader, doc_loader, docx_loader, odt_loader, notebook_loader]
|
168 |
|