import os import chainlit as cl from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough from langchain_community.vectorstores import Chroma from llama_cpp import Llama # Initialize the embedding model embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': True} ) # Load the existing Chroma vector store persist_directory = os.path.join(os.path.dirname(__file__), 'mydb') vectorstore = Chroma( persist_directory=persist_directory, embedding_function=embeddings ) # Initialize the Llama model using from_pretrained llm = Llama.from_pretrained( repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF", filename="Llama-3.2-1B-Instruct-Q8_0.gguf", ) # Create the RAG prompt template template = """You are a helpful AI assistant. Using only the following context, answer the user's question. If you cannot find the answer in the context, say "I don't have enough information to answer this question." Context: {context} Question: {question} Answer: Let me help you with that.""" prompt = ChatPromptTemplate.from_template(template) @cl.on_chat_start async def start(): await cl.Message( content="Hi! I'm ready to answer your questions based on the stored documents. What would you like to know?" ).send() @cl.on_message async def main(message: cl.Message): msg = cl.Message(content="") await msg.send() async with cl.Step(name="Searching documents..."): try: retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) docs = retriever.get_relevant_documents(message.content) context = "\n\n".join([doc.page_content for doc in docs]) # Format the prompt final_prompt = prompt.format(context=context, question=message.content) # Generate response using the Llama model response = llm.create_chat_completion( messages=[ { "role": "user", "content": final_prompt } ] ) assistant_reply = response['choices'][0]['message']['content'] # Update loading message with response await msg.update(content=assistant_reply) # Show source documents elements = [] for i, doc in enumerate(docs): source_name = f"Source {i+1}" elements.append( cl.Text(name=source_name, content=doc.page_content, display="inline") ) if elements: await msg.update(elements=elements) except Exception as e: import traceback error_msg = f"An error occurred: {str(e)}\n{traceback.format_exc()}" await msg.update(content=error_msg) if __name__ == '__main__': cl.run()