File size: 3,165 Bytes
d8472fa
d638db5
fa23d20
d8472fa
 
 
 
fa23d20
1dff132
fa23d20
 
 
 
 
 
1dff132
d8472fa
fa23d20
d7b6100
 
 
 
1dff132
d7b6100
 
 
 
d8472fa
1dff132
d8472fa
fa23d20
 
d638db5
fa23d20
d8472fa
1dff132
d8472fa
d638db5
fa23d20
d638db5
d8472fa
 
 
 
 
 
 
d638db5
 
d8472fa
 
 
 
 
 
 
d7b6100
 
 
 
 
d8472fa
d7b6100
 
 
 
 
 
 
 
d8472fa
d7b6100
d8472fa
 
d7b6100
 
d8472fa
 
 
 
 
 
 
 
 
 
 
 
b0071a4
 
36cef3b
 
bf22b40
d7b6100
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import os
import chainlit as cl
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.vectorstores import Chroma
from llama_cpp import Llama

# Initialize the embedding model
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': True}
)

# Load the existing Chroma vector store
persist_directory = os.path.join(os.path.dirname(__file__), 'mydb')
vectorstore = Chroma(
    persist_directory=persist_directory,
    embedding_function=embeddings
)

# Initialize the Llama model using from_pretrained
llm = Llama.from_pretrained(
    repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF",
    filename="Llama-3.2-1B-Instruct-Q8_0.gguf",
)

# Create the RAG prompt template
template = """You are a helpful AI assistant. Using only the following context, answer the user's question.
If you cannot find the answer in the context, say "I don't have enough information to answer this question."

Context:
{context}

Question: {question}

Answer: Let me help you with that."""

prompt = ChatPromptTemplate.from_template(template)

@cl.on_chat_start
async def start():
    await cl.Message(
        content="Hi! I'm ready to answer your questions based on the stored documents. What would you like to know?"
    ).send()

@cl.on_message
async def main(message: cl.Message):
    msg = cl.Message(content="")
    await msg.send()

    async with cl.Step(name="Searching documents..."):
        try:
            retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
            docs = retriever.get_relevant_documents(message.content)
            context = "\n\n".join([doc.page_content for doc in docs])
            
            # Format the prompt
            final_prompt = prompt.format(context=context, question=message.content)
            
            # Generate response using the Llama model
            response = llm.create_chat_completion(
                messages=[
                    {
                        "role": "user",
                        "content": final_prompt
                    }
                ]
            )
            assistant_reply = response['choices'][0]['message']['content']
            
            # Update loading message with response
            await msg.update(content=assistant_reply)
            
            # Show source documents
            elements = []
            for i, doc in enumerate(docs):
                source_name = f"Source {i+1}"
                elements.append(
                    cl.Text(name=source_name, content=doc.page_content, display="inline")
                )
            
            if elements:
                await msg.update(elements=elements)

        except Exception as e:
            import traceback
            error_msg = f"An error occurred: {str(e)}\n{traceback.format_exc()}"
            await msg.update(content=error_msg)

if __name__ == '__main__':
    cl.run()