captain-awesome commited on
Commit
b62d7f8
·
1 Parent(s): 29f8c44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -2
app.py CHANGED
@@ -135,6 +135,23 @@ def create_vector_database(uploaded_files):
135
  # docx_loader = DirectoryLoader("data/", glob="**/*.docx", loader_cls=UnstructuredWordDocumentLoader)
136
  # odt_loader = DirectoryLoader("data/", glob="**/*.odt", loader_cls=UnstructuredODTLoader)
137
  # notebook_loader = DirectoryLoader("data/", glob="**/*.ipynb", loader_cls=NotebookLoader)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  # Load documents from uploaded files using the appropriate loaders
140
  loaded_documents = []
@@ -142,10 +159,10 @@ def create_vector_database(uploaded_files):
142
  # file_extension = os.path.splitext(uploaded_file.name)[-1].lower()[1:]
143
  file_extension = os.path.splitext(uploaded_file.name)[-1][1:].lower()
144
  if file_extension in loaders:
145
- loader_cls = loaders[file_extension]
146
  loader = loader_cls.load(uploaded_file.name) # Pass the file path to the loader constructor
147
  # content = uploaded_file.read() # Read the file content
148
- # loaded_documents.extend(loader.load(content))
149
 
150
  # all_loaders = [pdf_loader, markdown_loader, text_loader, csv_loader, python_loader, epub_loader, html_loader, ppt_loader, pptx_loader, doc_loader, docx_loader, odt_loader, notebook_loader]
151
 
 
135
  # docx_loader = DirectoryLoader("data/", glob="**/*.docx", loader_cls=UnstructuredWordDocumentLoader)
136
  # odt_loader = DirectoryLoader("data/", glob="**/*.odt", loader_cls=UnstructuredODTLoader)
137
  # notebook_loader = DirectoryLoader("data/", glob="**/*.ipynb", loader_cls=NotebookLoader)
138
+ # FILE_LOADER_MAPPING = {
139
+ # ".csv": (CSVLoader, {"encoding": "utf-8"}),
140
+ # ".doc": (UnstructuredWordDocumentLoader, {}),
141
+ # ".docx": (UnstructuredWordDocumentLoader, {}),
142
+ # ".enex": (EverNoteLoader, {}),
143
+ # ".epub": (UnstructuredEPubLoader, {}),
144
+ # ".html": (UnstructuredHTMLLoader, {}),
145
+ # ".md": (UnstructuredMarkdownLoader, {}),
146
+ # ".odt": (UnstructuredODTLoader, {}),
147
+ # ".pdf": (PyPDFLoader, {}),
148
+ # ".ppt": (UnstructuredPowerPointLoader, {}),
149
+ # ".pptx": (UnstructuredPowerPointLoader, {}),
150
+ # ".txt": (TextLoader, {"encoding": "utf8"}),
151
+ # ".ipynb": (NotebookLoader, {}),
152
+ # ".py": (PythonLoader, {}),
153
+ # # Add more mappings for other file extensions and loaders as needed
154
+ # }
155
 
156
  # Load documents from uploaded files using the appropriate loaders
157
  loaded_documents = []
 
159
  # file_extension = os.path.splitext(uploaded_file.name)[-1].lower()[1:]
160
  file_extension = os.path.splitext(uploaded_file.name)[-1][1:].lower()
161
  if file_extension in loaders:
162
+ loader_cls = loaders[file_extension](uploaded_file)
163
  loader = loader_cls.load(uploaded_file.name) # Pass the file path to the loader constructor
164
  # content = uploaded_file.read() # Read the file content
165
+ loaded_documents.extend(loader.load(content))
166
 
167
  # all_loaders = [pdf_loader, markdown_loader, text_loader, csv_loader, python_loader, epub_loader, html_loader, ppt_loader, pptx_loader, doc_loader, docx_loader, odt_loader, notebook_loader]
168