Spaces:

danicafisher
/

implications-of-AI

Sleeping

App Files Files Community

implications-of-AI / app.py

danicafisher

Update app.py

63bab67 verified about 1 month ago

raw

history blame

3.25 kB

	import os
	from langchain_community.document_loaders import PyMuPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_openai import OpenAIEmbeddings, ChatOpenAI
	from langchain_qdrant import QdrantVectorStore
	from langchain.prompts import ChatPromptTemplate
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables import RunnablePassthrough
	from qdrant_client import QdrantClient
	from qdrant_client.http.models import Distance, VectorParams
	from operator import itemgetter
	import chainlit as cl

	# # Load the documents
	# pdf_loader_NIST = PyMuPDFLoader("data/NIST.AI.600-1.pdf").load()
	# pdf_loader_Blueprint = PyMuPDFLoader("data/Blueprint-for-an-AI-Bill-of-Rights.pdf").load()
	# documents = pdf_loader_NIST + pdf_loader_Blueprint

	# List to store all the documents
	documents = []
	directory = "data/"

	# Iterate through all the files in the directory
	for filename in os.listdir(directory):
	if filename.endswith(".pdf"): # Check if the file is a PDF
	file_path = os.path.join(directory, filename)
	loader = PyMuPDFLoader(file_path)
	docs = loader.load()
	documents.extend(docs)

	# Split the documents
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=500,
	chunk_overlap=40,
	length_function=len,
	is_separator_regex=False
	)
	rag_documents = text_splitter.split_documents(documents)

	# Create the vector store
	# @cl.cache_resource
	@cl.on_chat_start
	async def start_chat():
	LOCATION = ":memory:"
	COLLECTION_NAME = "Implications of AI"
	VECTOR_SIZE = 1536


	embeddings = OpenAIEmbeddings()
	qdrant_client = QdrantClient(location=LOCATION)

	# Create the collection
	qdrant_client.create_collection(
	collection_name=COLLECTION_NAME,
	vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE),
	)

	# Create the vector store
	vectorstore = QdrantVectorStore(
	client=qdrant_client,
	collection_name=COLLECTION_NAME,
	embedding=embeddings
	)

	# Load and add documents
	vectorstore.add_documents(rag_documents)
	retriever = vectorstore.as_retriever()


	template = """
	Use the provided context to answer the user's query.
	You may not answer the user's query unless there is specific context in the following text.
	If you do not know the answer, or cannot answer, please respond with "I don't know".
	Question:
	{question}
	Context:
	{context}
	Answer:
	"""

	prompt = ChatPromptTemplate.from_template(template)
	base_llm = ChatOpenAI(model_name="gpt-4", temperature=0)

	retrieval_augmented_qa_chain = (
	{"context": itemgetter("question") \| retriever, "question": itemgetter("question")}
	\| RunnablePassthrough.assign(context=itemgetter("context"))
	\| {"response": prompt \| base_llm, "context": itemgetter("context")}
	)

	cl.user_session.set("chain", retrieval_augmented_qa_chain)


	@cl.on_message
	async def main(message):
	chain = cl.user_session.get("chain")

	msg = cl.Message(content="")
	result = await chain.invoke(message.content)

	async for stream_resp in result["response"]:
	await msg.stream_token(stream_resp)

	await msg.send()