File size: 2,846 Bytes
d8472fa
d638db5
d8472fa
 
 
 
 
d638db5
d8472fa
1dff132
 
d8472fa
 
 
1dff132
d8472fa
 
 
 
 
1dff132
d8472fa
 
d638db5
d8472fa
1dff132
d8472fa
d638db5
d8472fa
d638db5
d8472fa
 
 
 
 
d638db5
d8472fa
 
 
 
 
 
 
 
d638db5
 
d8472fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import chainlit as cl
from langchain_community.llms import Ollama
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


# Load the existing Chroma vector store
persist_directory = 'mydb'
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)

# Initialize Ollama LLM
llm = Ollama(
    model="llama3.2",  # You can change this to any model you have pulled in Ollama
    temperature=0
)

# Create the RAG prompt template
template = """Answer the question based only on the following context:

{context}

Question: {question}

Answer the question in a clear and concise way. If you cannot find the answer in the context, just say "I don't have enough information to answer this question."

Make sure to:
1. Only use information from the provided context
2. Be concise and direct
3. If you're unsure, acknowledge it
"""

prompt = ChatPromptTemplate.from_template(template)

@cl.on_chat_start
async def start():
    # Send initial message
    await cl.Message(
        content="Hi! I'm ready to answer your questions based on the stored documents. What would you like to know?"
    ).send()

@cl.on_message
async def main(message: cl.Message):
    # Create a loading message
    msg = cl.Message(content="")
    await msg.send()

    # Start typing effect
    async with cl.Step(name="Searching documents..."):
        try:
            # Search the vector store
            retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
            
            # Create the RAG chain
            rag_chain = (
                {"context": retriever, "question": RunnablePassthrough()}
                | prompt 
                | llm 
                | StrOutputParser()
            )
            
            # Execute the chain
            response = await cl.make_async(rag_chain)(message.content)

            # Update loading message with response
            await msg.update(content=response)

            # Show source documents
            docs = retriever.get_relevant_documents(message.content)
            elements = []
            for i, doc in enumerate(docs):
                source_name = f"Source {i+1}"
                elements.append(
                    cl.Text(name=source_name, content=doc.page_content, display="inline")
                )
            
            if elements:
                await msg.update(elements=elements)

        except Exception as e:
            await msg.update(content=f"An error occurred: {str(e)}")

if __name__ == "__main__":
    cl.run()