File size: 1,122 Bytes
952909f
2624a11
7bc489f
 
952909f
 
 
 
 
 
7bc489f
d056c3f
7bc489f
952909f
 
2624a11
 
952909f
7bc489f
2624a11
 
 
e5beda5
2624a11
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from langchain_community.document_loaders import PyMuPDFLoader, TextLoader, WebBaseLoader
from langchain_community.vectorstores import Qdrant
import os

def process_file(file_or_url):
    if isinstance(file_or_url, str) and file_or_url.startswith(('http://', 'https://')):
        # Handle URL
        loader = WebBaseLoader(file_or_url)
        docs = loader.load()
        documents.extend(docs)
    # save the file temporarily
    temp_file = "./"+file_or_url.path
    with open(temp_file, "wb") as file:
       file.write(file_or_url.content)
       file_name = file_or_url.name

    documents = []
    if file_or_url.path.endswith(".pdf"):
        loader = PyMuPDFLoader(temp_file)
        docs = loader.load()
        documents.extend(docs)
    else:
        loader = TextLoader(temp_file)
        docs = loader.load()
        documents.extend(docs)
    return documents


def add_to_qdrant(documents, embeddings, qdrant_client, collection_name):
    Qdrant.from_documents(
        documents,
        embeddings,
        url=qdrant_client.url,
        prefer_grpc=True,
        collection_name=collection_name,
    )