captain-awesome commited on
Commit
f241b97
·
1 Parent(s): 523d9c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -69
app.py CHANGED
@@ -149,75 +149,6 @@ def create_vector_database(loaded_documents):
149
  and finally persists the embeddings into a Chroma vector database.
150
 
151
  """
152
- # Initialize loaders for different file types
153
- # loaders = {
154
- # "pdf": UnstructuredPDFLoader,
155
- # "md": UnstructuredMarkdownLoader,
156
- # "txt": TextLoader,
157
- # "csv": CSVLoader,
158
- # "py": PythonLoader,
159
- # "epub": UnstructuredEPubLoader,
160
- # "html": UnstructuredHTMLLoader,
161
- # "ppt": UnstructuredPowerPointLoader,
162
- # "pptx": UnstructuredPowerPointLoader,
163
- # "doc": UnstructuredWordDocumentLoader,
164
- # "docx": UnstructuredWordDocumentLoader,
165
- # "odt": UnstructuredODTLoader,
166
- # "ipynb": NotebookLoader
167
- # }
168
- # pdf_loader = DirectoryLoader("data/", glob="**/*.pdf", loader_cls=PyPDFLoader)
169
- # markdown_loader = DirectoryLoader("data/", glob="**/*.md", loader_cls=UnstructuredMarkdownLoader)
170
- # text_loader = DirectoryLoader("data/", glob="**/*.txt", loader_cls=TextLoader)
171
- # csv_loader = DirectoryLoader("data/", glob="**/*.csv", loader_cls=CSVLoader)
172
- # python_loader = DirectoryLoader("data/", glob="**/*.py", loader_cls=PythonLoader)
173
- # epub_loader = DirectoryLoader("data/", glob="**/*.epub", loader_cls=UnstructuredEPubLoader)
174
- # html_loader = DirectoryLoader("data/", glob="**/*.html", loader_cls=UnstructuredHTMLLoader)
175
- # ppt_loader = DirectoryLoader("data/", glob="**/*.ppt", loader_cls=UnstructuredPowerPointLoader)
176
- # pptx_loader = DirectoryLoader("data/", glob="**/*.pptx", loader_cls=UnstructuredPowerPointLoader)
177
- # doc_loader = DirectoryLoader("data/", glob="**/*.doc", loader_cls=UnstructuredWordDocumentLoader)
178
- # docx_loader = DirectoryLoader("data/", glob="**/*.docx", loader_cls=UnstructuredWordDocumentLoader)
179
- # odt_loader = DirectoryLoader("data/", glob="**/*.odt", loader_cls=UnstructuredODTLoader)
180
- # notebook_loader = DirectoryLoader("data/", glob="**/*.ipynb", loader_cls=NotebookLoader)
181
- # FILE_LOADER_MAPPING = {
182
- # ".csv": (CSVLoader, {"encoding": "utf-8"}),
183
- # ".doc": (UnstructuredWordDocumentLoader, {}),
184
- # ".docx": (UnstructuredWordDocumentLoader, {}),
185
- # ".enex": (EverNoteLoader, {}),
186
- # ".epub": (UnstructuredEPubLoader, {}),
187
- # ".html": (UnstructuredHTMLLoader, {}),
188
- # ".md": (UnstructuredMarkdownLoader, {}),
189
- # ".odt": (UnstructuredODTLoader, {}),
190
- # ".pdf": (PyPDFLoader, {}),
191
- # ".ppt": (UnstructuredPowerPointLoader, {}),
192
- # ".pptx": (UnstructuredPowerPointLoader, {}),
193
- # ".txt": (TextLoader, {"encoding": "utf8"}),
194
- # ".ipynb": (NotebookLoader, {}),
195
- # ".py": (PythonLoader, {}),
196
- # # Add more mappings for other file extensions and loaders as needed
197
- # }
198
-
199
- # Load documents from uploaded files using the appropriate loaders
200
- # loaded_documents = []
201
- # for uploaded_file in uploaded_files:
202
- # # file_extension = os.path.splitext(uploaded_file.name)[-1].lower()[1:]
203
- # file_extension = os.path.splitext(uploaded_file.name)[-1][1:].lower()
204
- # if file_extension in loaders:
205
- # # Read the content of the uploaded file
206
- # file_content = uploaded_file.read()
207
-
208
- # # Pass the content to the loader for processing
209
- # loader = loaders[file_extension](file_content)
210
- # loaded_documents.extend(loader.load())
211
- # loader = loaders[file_extension](uploaded_file)
212
- # # loader = loader_cls.load(uploaded_file.name) # Pass the file path to the loader constructor
213
- # # # content = uploaded_file.read() # Read the file content
214
- # loaded_documents.extend(loader.load())
215
-
216
- # all_loaders = [pdf_loader, markdown_loader, text_loader, csv_loader, python_loader, epub_loader, html_loader, ppt_loader, pptx_loader, doc_loader, docx_loader, odt_loader, notebook_loader]
217
-
218
- # Load documents from all loaders
219
- # for loader in all_loaders:
220
- # loaded_documents.extend(loader.load())
221
 
222
  # Split loaded documents into chunks
223
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=40)
 
149
  and finally persists the embeddings into a Chroma vector database.
150
 
151
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  # Split loaded documents into chunks
154
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=40)