Spaces:

AI-RESEARCHER-2024
/

CHAINLIT-RAG

Runtime error

App Files Files Community

CHAINLIT-RAG / app.py

AI-RESEARCHER-2024

Update app.py

fa23d20 verified 7 days ago

raw

history blame

3.96 kB

	import os
	from typing import Any, List, Mapping, Optional
	import chainlit as cl
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain.prompts import ChatPromptTemplate
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables import RunnablePassthrough
	from langchain_community.vectorstores import Chroma
	from langchain.callbacks.manager import CallbackManagerForLLMRun
	from langchain.llms.base import LLM
	from llama_cpp import Llama

	class LlamaCppLLM(LLM):
	"""Custom LangChain wrapper for llama.cpp"""

	model: Any

	def __init__(self, model: Llama):
	super().__init__()
	self.model = model

	@property
	def _llm_type(self) -> str:
	return "llama.cpp"

	def _call(
	self,
	prompt: str,
	stop: Optional[List[str]] = None,
	run_manager: Optional[CallbackManagerForLLMRun] = None,
	**kwargs: Any,
	) -> str:
	response = self.model.create_chat_completion(
	messages=[{"role": "user", "content": prompt}],
	**kwargs
	)
	return response["choices"][0]["message"]["content"]

	# Initialize the embedding model
	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2",
	model_kwargs={'device': 'cpu'},
	encode_kwargs={'normalize_embeddings': True}
	)

	# Load the existing Chroma vector store
	persist_directory = os.path.join(os.path.dirname(__file__), 'mydb')
	vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)

	# Initialize Llama model
	llama_model = Llama.from_pretrained(
	repo_id="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
	filename="Meta-Llama-3.1-8B-Instruct-IQ2_M.gguf",
	n_ctx=2048, # Context window
	n_threads=4, # Number of CPU threads to use
	n_gpu_layers=0 # Set to higher number if using GPU
	)

	# Create LangChain wrapper
	llm = LlamaCppLLM(model=llama_model)

	# Create the RAG prompt template
	template = """You are a helpful AI assistant. Using only the following context, answer the user's question.
	If you cannot find the answer in the context, say "I don't have enough information to answer this question."

	Context:
	{context}

	Question: {question}

	Answer: Let me help you with that."""

	prompt = ChatPromptTemplate.from_template(template)

	@cl.on_chat_start
	async def start():
	# Send initial message
	await cl.Message(
	content="Hi! I'm ready to answer your questions based on the stored documents. What would you like to know?"
	).send()

	@cl.on_message
	async def main(message: cl.Message):
	# Create a loading message
	msg = cl.Message(content="")
	await msg.send()

	# Start typing effect
	async with cl.Step(name="Searching documents..."):
	try:
	# Search the vector store
	retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

	# Create the RAG chain
	rag_chain = (
	{"context": retriever, "question": RunnablePassthrough()}
	\| prompt
	\| llm
	\| StrOutputParser()
	)

	# Execute the chain
	response = await cl.make_async(rag_chain)(message.content)

	# Update loading message with response
	await msg.update(content=response)

	# Show source documents
	docs = retriever.get_relevant_documents(message.content)
	elements = []
	for i, doc in enumerate(docs):
	source_name = f"Source {i+1}"
	elements.append(
	cl.Text(name=source_name, content=doc.page_content, display="inline")
	)

	if elements:
	await msg.update(elements=elements)

	except Exception as e:
	await msg.update(content=f"An error occurred: {str(e)}")

	if __name__ == "__main__":
	cl.run()