langchat / app.py
Tanmay09516's picture
Upload 14 files
dd87c4b verified
# app.py
import gradio as gr
from embeddings import init_embeddings
from vectorstore import load_all_vector_stores
from retriever import create_combined_retriever
from chain import init_conversational_chain
from langchain_groq import ChatGroq # Custom LLM class
from dotenv import load_dotenv
import os
import sys
# Disable parallelism warnings from tokenizers
os.environ["TOKENIZERS_PARALLELISM"] = "false"
def init_llm():
"""
Initialize the Language Model (LLM) using the ChatGroq class.
Loads environment variables from a .env file.
"""
load_dotenv()
llm = ChatGroq()
return llm
def setup():
"""
Set up the QA chain by initializing embeddings, loading vector stores,
creating a combined retriever, and initializing the conversational chain.
"""
embeddings = init_embeddings()
# Check if vector stores exist
if not os.path.exists("vector_stores") or not os.listdir("vector_stores"):
print("Vector stores not found. Please run 'build_vectorstore.py' first.")
sys.exit(1)
# Load all vector stores
vector_stores = load_all_vector_stores(embeddings)
# Create a combined retriever from all vector stores
retriever = create_combined_retriever(vector_stores)
# Initialize the LLM
llm = init_llm()
# Initialize the conversational QA chain
qa_chain = init_conversational_chain(llm, retriever)
return qa_chain
# Set up the QA chain
qa_chain = setup()
def format_source_doc(doc):
"""
Format a source document for display.
Args:
doc: A document object containing page_content and metadata.
Returns:
A dictionary with a preview, full content, and source.
"""
preview = doc.page_content[:150] + "..." # Short preview
source = doc.metadata.get('source', 'Unknown')
return {
"preview": preview,
"full_content": doc.page_content,
"source": source
}
def get_chat_history_tuples(history_messages):
"""
Convert the chat history from a list of message dictionaries to a list of tuples.
Args:
history_messages: List of message dictionaries with 'role' and 'content'.
Returns:
List of tuples in the form (user_message, assistant_message).
"""
chat_history_tuples = []
user_msg = None
assistant_msg = None
for msg in history_messages:
if msg['role'] == 'user':
if user_msg is not None:
# Append previous user message without assistant response
chat_history_tuples.append((user_msg, assistant_msg))
user_msg = msg['content']
assistant_msg = None
elif msg['role'] == 'assistant':
assistant_msg = msg['content']
chat_history_tuples.append((user_msg, assistant_msg))
user_msg = None
assistant_msg = None
# Append any remaining user message
if user_msg is not None:
chat_history_tuples.append((user_msg, assistant_msg))
return chat_history_tuples
def chatbot(message, history):
"""
Handle the chatbot interaction by invoking the QA chain and formatting the response.
Args:
message: The user's message.
history: The chat history.
Returns:
A tuple containing the assistant's answer and the list of source documents.
"""
# Convert history to list of tuples
if history is None:
history = []
chat_history = get_chat_history_tuples(history)
# Invoke the QA chain with the formatted history
response = qa_chain.invoke({
"question": message,
"chat_history": chat_history
})
# Format the response as a message dictionary
answer = {
"role": "assistant",
"content": response["answer"]
}
# Format source documents
source_docs = [format_source_doc(doc) for doc in response["source_documents"]]
return answer, source_docs
def show_popup(source_doc):
"""
Show a popup with the full content of the selected source document.
Args:
source_doc: The selected source document.
Returns:
An update object for the Gradio Textbox component.
"""
return gr.update(
value=f"Source: {source_doc['source']}\n\n{source_doc['full_content']}",
visible=True
)
# Define the Gradio Blocks interface
with gr.Blocks(css="""
.source-box { margin: 5px; padding: 10px; border: 1px solid #ddd; border-radius: 5px; }
.source-box:hover { background-color: #f5f5f5; cursor: pointer; }
""") as demo:
gr.Markdown("# Lang-Chat Chatbot")
with gr.Row():
with gr.Column(scale=7):
# Chat history component
chatbot_component = gr.Chatbot(
label="Chat History",
height=500,
bubble_full_width=False,
type="messages"
)
with gr.Row():
# Input textbox for user messages
msg = gr.Textbox(
label="Your Question",
placeholder="Ask me anything about LangChain...",
scale=8
)
# Submit button
submit = gr.Button("Send", scale=1)
with gr.Column(scale=3):
gr.Markdown("### Source Documents")
# Dropdown to select source documents
source_dropdown = gr.Dropdown(
label="Select a Source Document",
interactive=True
)
# Textbox to display full content of the selected document
popup = gr.Textbox(
label="Document Details",
interactive=False,
visible=False,
lines=10
)
# Hidden state to store source data
source_data_state = gr.State()
def process_message(message, history):
"""
Process the user's message, update chat history, and prepare source document options.
Args:
message: The user's message.
history: The current chat history.
Returns:
Updated chat history, updated source dropdown options, and updated source data state.
"""
if history is None:
history = []
answer, sources = chatbot(message, history)
# Append the new user message and assistant response to history
history.append({"role": "user", "content": message})
history.append(answer)
# Prepare options for the dropdown
source_options = []
for idx, source in enumerate(sources):
option_label = f"{idx+1}. {source['source']} - {source['preview'][:30]}..."
source_options.append(option_label)
# Store sources in state
source_data_state = sources
return history, gr.update(choices=source_options, value=None), source_data_state
# Define the submit action for both the textbox and the button
msg.submit(
process_message,
[msg, chatbot_component],
[chatbot_component, source_dropdown, source_data_state]
)
submit.click(
process_message,
[msg, chatbot_component],
[chatbot_component, source_dropdown, source_data_state]
)
def show_popup(selected_option, source_data_state):
"""
Display the full content of the selected source document in a popup.
Args:
selected_option: The selected option from the dropdown.
source_data_state: The list of source documents.
Returns:
An update object for the popup textbox.
"""
if selected_option is None:
return gr.update(visible=False)
sources = source_data_state
# Extract index from selected_option
idx = int(selected_option.split('.')[0]) - 1
source = sources[idx]
full_content = f"Source: {source['source']}\n\n{source['full_content']}"
return gr.update(value=full_content, visible=True)
# Define the change action for the dropdown
source_dropdown.change(show_popup, inputs=[source_dropdown, source_data_state], outputs=popup)
# Launch the Gradio interface
demo.launch()