import os from langchain_community.document_loaders import PyMuPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_openai import OpenAIEmbeddings, ChatOpenAI from langchain_qdrant import QdrantVectorStore from langchain.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough from qdrant_client import QdrantClient from qdrant_client.http.models import Distance, VectorParams from operator import itemgetter import chainlit as cl # # Load the documents # pdf_loader_NIST = PyMuPDFLoader("data/NIST.AI.600-1.pdf").load() # pdf_loader_Blueprint = PyMuPDFLoader("data/Blueprint-for-an-AI-Bill-of-Rights.pdf").load() # documents = pdf_loader_NIST + pdf_loader_Blueprint # List to store all the documents documents = [] directory = "data/" # Iterate through all the files in the directory for filename in os.listdir(directory): if filename.endswith(".pdf"): # Check if the file is a PDF file_path = os.path.join(directory, filename) loader = PyMuPDFLoader(file_path) docs = loader.load() all_docs.extend(docs) # Split the documents text_splitter = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=40, length_function=len, is_separator_regex=False ) rag_documents = text_splitter.split_documents(documents) # Create the vector store # @cl.cache_resource @cl.on_chat_start async def start_chat(): LOCATION = ":memory:" COLLECTION_NAME = "Implications of AI" VECTOR_SIZE = 1536 embeddings = OpenAIEmbeddings() qdrant_client = QdrantClient(location=LOCATION) # Create the collection qdrant_client.create_collection( collection_name=COLLECTION_NAME, vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE), ) # Create the vector store vectorstore = QdrantVectorStore( client=qdrant_client, collection_name=COLLECTION_NAME, embedding=embeddings ) # Load and add documents vectorstore.add_documents(rag_documents) retriever = vectorstore.as_retriever() template = """ Use the provided context to answer the user's query. You may not answer the user's query unless there is specific context in the following text. If you do not know the answer, or cannot answer, please respond with "I don't know". Question: {question} Context: {context} Answer: """ prompt = ChatPromptTemplate.from_template(template) base_llm = ChatOpenAI(model_name="gpt-4", temperature=0) retrieval_augmented_qa_chain = ( {"context": itemgetter("question") | retriever, "question": itemgetter("question")} | RunnablePassthrough.assign(context=itemgetter("context")) | {"response": prompt | base_llm, "context": itemgetter("context")} ) cl.user_session.set("chain", retrieval_augmented_qa_chain) @cl.on_message async def main(message): chain = cl.user_session.get("chain") msg = cl.Message(content="") result = await chain.invoke(message.content) async for stream_resp in result["response"]: await msg.stream_token(stream_resp) await msg.send()