Spaces:
Runtime error
Runtime error
import os | |
from typing import Any, List, Mapping, Optional | |
import chainlit as cl | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain.prompts import ChatPromptTemplate | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.runnables import RunnablePassthrough | |
from langchain_community.vectorstores import Chroma | |
from langchain.callbacks.manager import CallbackManagerForLLMRun | |
from langchain.llms.base import LLM | |
from llama_cpp import Llama | |
class LlamaCppLLM(LLM): | |
"""Custom LangChain wrapper for llama.cpp""" | |
model: Any | |
def __init__(self, model: Llama): | |
super().__init__() | |
self.model = model | |
def _llm_type(self) -> str: | |
return "llama.cpp" | |
def _call( | |
self, | |
prompt: str, | |
stop: Optional[List[str]] = None, | |
run_manager: Optional[CallbackManagerForLLMRun] = None, | |
**kwargs: Any, | |
) -> str: | |
response = self.model.create_chat_completion( | |
messages=[{"role": "user", "content": prompt}], | |
**kwargs | |
) | |
return response["choices"][0]["message"]["content"] | |
# Initialize the embedding model | |
embeddings = HuggingFaceEmbeddings( | |
model_name="sentence-transformers/all-MiniLM-L6-v2", | |
model_kwargs={'device': 'cpu'}, | |
encode_kwargs={'normalize_embeddings': True} | |
) | |
# Load the existing Chroma vector store | |
persist_directory = os.path.join(os.path.dirname(__file__), 'mydb') | |
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings) | |
# Initialize Llama model | |
llama_model = Llama.from_pretrained( | |
repo_id="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", | |
filename="Meta-Llama-3.1-8B-Instruct-IQ2_M.gguf", | |
n_ctx=2048, # Context window | |
n_threads=4, # Number of CPU threads to use | |
n_gpu_layers=0 # Set to higher number if using GPU | |
) | |
# Create LangChain wrapper | |
llm = LlamaCppLLM(model=llama_model) | |
# Create the RAG prompt template | |
template = """You are a helpful AI assistant. Using only the following context, answer the user's question. | |
If you cannot find the answer in the context, say "I don't have enough information to answer this question." | |
Context: | |
{context} | |
Question: {question} | |
Answer: Let me help you with that.""" | |
prompt = ChatPromptTemplate.from_template(template) | |
async def start(): | |
# Send initial message | |
await cl.Message( | |
content="Hi! I'm ready to answer your questions based on the stored documents. What would you like to know?" | |
).send() | |
async def main(message: cl.Message): | |
# Create a loading message | |
msg = cl.Message(content="") | |
await msg.send() | |
# Start typing effect | |
async with cl.Step(name="Searching documents..."): | |
try: | |
# Search the vector store | |
retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) | |
# Create the RAG chain | |
rag_chain = ( | |
{"context": retriever, "question": RunnablePassthrough()} | |
| prompt | |
| llm | |
| StrOutputParser() | |
) | |
# Execute the chain | |
response = await cl.make_async(rag_chain)(message.content) | |
# Update loading message with response | |
await msg.update(content=response) | |
# Show source documents | |
docs = retriever.get_relevant_documents(message.content) | |
elements = [] | |
for i, doc in enumerate(docs): | |
source_name = f"Source {i+1}" | |
elements.append( | |
cl.Text(name=source_name, content=doc.page_content, display="inline") | |
) | |
if elements: | |
await msg.update(elements=elements) | |
except Exception as e: | |
await msg.update(content=f"An error occurred: {str(e)}") | |
if __name__ == "__main__": | |
cl.run() |