Spaces:

AI-RESEARCHER-2024
/

CHAINLIT-RAG

Runtime error

App Files Files Community

CHAINLIT-RAG / app.py

AI-RESEARCHER-2024

Update app.py

d7b6100 verified 7 days ago

raw

history blame contribute delete

3.17 kB

	import os
	import chainlit as cl
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain.prompts import ChatPromptTemplate
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables import RunnablePassthrough
	from langchain_community.vectorstores import Chroma
	from llama_cpp import Llama

	# Initialize the embedding model
	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2",
	model_kwargs={'device': 'cpu'},
	encode_kwargs={'normalize_embeddings': True}
	)

	# Load the existing Chroma vector store
	persist_directory = os.path.join(os.path.dirname(__file__), 'mydb')
	vectorstore = Chroma(
	persist_directory=persist_directory,
	embedding_function=embeddings
	)

	# Initialize the Llama model using from_pretrained
	llm = Llama.from_pretrained(
	repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF",
	filename="Llama-3.2-1B-Instruct-Q8_0.gguf",
	)

	# Create the RAG prompt template
	template = """You are a helpful AI assistant. Using only the following context, answer the user's question.
	If you cannot find the answer in the context, say "I don't have enough information to answer this question."

	Context:
	{context}

	Question: {question}

	Answer: Let me help you with that."""

	prompt = ChatPromptTemplate.from_template(template)

	@cl.on_chat_start
	async def start():
	await cl.Message(
	content="Hi! I'm ready to answer your questions based on the stored documents. What would you like to know?"
	).send()

	@cl.on_message
	async def main(message: cl.Message):
	msg = cl.Message(content="")
	await msg.send()

	async with cl.Step(name="Searching documents..."):
	try:
	retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
	docs = retriever.get_relevant_documents(message.content)
	context = "\n\n".join([doc.page_content for doc in docs])

	# Format the prompt
	final_prompt = prompt.format(context=context, question=message.content)

	# Generate response using the Llama model
	response = llm.create_chat_completion(
	messages=[
	{
	"role": "user",
	"content": final_prompt
	}
	]
	)
	assistant_reply = response['choices'][0]['message']['content']

	# Update loading message with response
	await msg.update(content=assistant_reply)

	# Show source documents
	elements = []
	for i, doc in enumerate(docs):
	source_name = f"Source {i+1}"
	elements.append(
	cl.Text(name=source_name, content=doc.page_content, display="inline")
	)

	if elements:
	await msg.update(elements=elements)

	except Exception as e:
	import traceback
	error_msg = f"An error occurred: {str(e)}\n{traceback.format_exc()}"
	await msg.update(content=error_msg)

	if __name__ == '__main__':
	cl.run()