aie4-final / helper_functions.py
danicafisher's picture
Update helper_functions.py
d056c3f verified
raw
history blame
1.12 kB
from langchain_community.document_loaders import PyMuPDFLoader, TextLoader, WebBaseLoader
from langchain_community.vectorstores import Qdrant
import os
def process_file(file_or_url):
if isinstance(file_or_url, str) and file_or_url.startswith(('http://', 'https://')):
# Handle URL
loader = WebBaseLoader(file_or_url)
docs = loader.load()
documents.extend(docs)
# save the file temporarily
temp_file = "./"+file_or_url.path
with open(temp_file, "wb") as file:
file.write(file_or_url.content)
file_name = file_or_url.name
documents = []
if file_or_url.path.endswith(".pdf"):
loader = PyMuPDFLoader(temp_file)
docs = loader.load()
documents.extend(docs)
else:
loader = TextLoader(temp_file)
docs = loader.load()
documents.extend(docs)
return documents
def add_to_qdrant(documents, embeddings, qdrant_client, collection_name):
Qdrant.from_documents(
documents,
embeddings,
url=qdrant_client.url,
prefer_grpc=True,
collection_name=collection_name,
)