Spaces:
Runtime error
Runtime error
import os | |
import chainlit as cl | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain.prompts import ChatPromptTemplate | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.runnables import RunnablePassthrough | |
from langchain_community.vectorstores import Chroma | |
from llama_cpp import Llama | |
# Initialize the embedding model | |
embeddings = HuggingFaceEmbeddings( | |
model_name="sentence-transformers/all-MiniLM-L6-v2", | |
model_kwargs={'device': 'cpu'}, | |
encode_kwargs={'normalize_embeddings': True} | |
) | |
# Load the existing Chroma vector store | |
persist_directory = os.path.join(os.path.dirname(__file__), 'mydb') | |
vectorstore = Chroma( | |
persist_directory=persist_directory, | |
embedding_function=embeddings | |
) | |
# Initialize the Llama model using from_pretrained | |
llm = Llama.from_pretrained( | |
repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF", | |
filename="Llama-3.2-1B-Instruct-Q8_0.gguf", | |
) | |
# Create the RAG prompt template | |
template = """You are a helpful AI assistant. Using only the following context, answer the user's question. | |
If you cannot find the answer in the context, say "I don't have enough information to answer this question." | |
Context: | |
{context} | |
Question: {question} | |
Answer: Let me help you with that.""" | |
prompt = ChatPromptTemplate.from_template(template) | |
async def start(): | |
await cl.Message( | |
content="Hi! I'm ready to answer your questions based on the stored documents. What would you like to know?" | |
).send() | |
async def main(message: cl.Message): | |
msg = cl.Message(content="") | |
await msg.send() | |
async with cl.Step(name="Searching documents..."): | |
try: | |
retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) | |
docs = retriever.get_relevant_documents(message.content) | |
context = "\n\n".join([doc.page_content for doc in docs]) | |
# Format the prompt | |
final_prompt = prompt.format(context=context, question=message.content) | |
# Generate response using the Llama model | |
response = llm.create_chat_completion( | |
messages=[ | |
{ | |
"role": "user", | |
"content": final_prompt | |
} | |
] | |
) | |
assistant_reply = response['choices'][0]['message']['content'] | |
# Update loading message with response | |
await msg.update(content=assistant_reply) | |
# Show source documents | |
elements = [] | |
for i, doc in enumerate(docs): | |
source_name = f"Source {i+1}" | |
elements.append( | |
cl.Text(name=source_name, content=doc.page_content, display="inline") | |
) | |
if elements: | |
await msg.update(elements=elements) | |
except Exception as e: | |
import traceback | |
error_msg = f"An error occurred: {str(e)}\n{traceback.format_exc()}" | |
await msg.update(content=error_msg) | |
if __name__ == '__main__': | |
cl.run() |