# app.py import gradio as gr from embeddings import init_embeddings from vectorstore import load_all_vector_stores from retriever import create_combined_retriever from chain import init_conversational_chain from langchain_groq import ChatGroq # Custom LLM class from dotenv import load_dotenv import os import sys # Disable parallelism warnings from tokenizers os.environ["TOKENIZERS_PARALLELISM"] = "false" def init_llm(): """ Initialize the Language Model (LLM) using the ChatGroq class. Loads environment variables from a .env file. """ load_dotenv() llm = ChatGroq() return llm def setup(): """ Set up the QA chain by initializing embeddings, loading vector stores, creating a combined retriever, and initializing the conversational chain. """ embeddings = init_embeddings() # Check if vector stores exist if not os.path.exists("vector_stores") or not os.listdir("vector_stores"): print("Vector stores not found. Please run 'build_vectorstore.py' first.") sys.exit(1) # Load all vector stores vector_stores = load_all_vector_stores(embeddings) # Create a combined retriever from all vector stores retriever = create_combined_retriever(vector_stores) # Initialize the LLM llm = init_llm() # Initialize the conversational QA chain qa_chain = init_conversational_chain(llm, retriever) return qa_chain # Set up the QA chain qa_chain = setup() def format_source_doc(doc): """ Format a source document for display. Args: doc: A document object containing page_content and metadata. Returns: A dictionary with a preview, full content, and source. """ preview = doc.page_content[:150] + "..." # Short preview source = doc.metadata.get('source', 'Unknown') return { "preview": preview, "full_content": doc.page_content, "source": source } def get_chat_history_tuples(history_messages): """ Convert the chat history from a list of message dictionaries to a list of tuples. Args: history_messages: List of message dictionaries with 'role' and 'content'. Returns: List of tuples in the form (user_message, assistant_message). """ chat_history_tuples = [] user_msg = None assistant_msg = None for msg in history_messages: if msg['role'] == 'user': if user_msg is not None: # Append previous user message without assistant response chat_history_tuples.append((user_msg, assistant_msg)) user_msg = msg['content'] assistant_msg = None elif msg['role'] == 'assistant': assistant_msg = msg['content'] chat_history_tuples.append((user_msg, assistant_msg)) user_msg = None assistant_msg = None # Append any remaining user message if user_msg is not None: chat_history_tuples.append((user_msg, assistant_msg)) return chat_history_tuples def chatbot(message, history): """ Handle the chatbot interaction by invoking the QA chain and formatting the response. Args: message: The user's message. history: The chat history. Returns: A tuple containing the assistant's answer and the list of source documents. """ # Convert history to list of tuples if history is None: history = [] chat_history = get_chat_history_tuples(history) # Invoke the QA chain with the formatted history response = qa_chain.invoke({ "question": message, "chat_history": chat_history }) # Format the response as a message dictionary answer = { "role": "assistant", "content": response["answer"] } # Format source documents source_docs = [format_source_doc(doc) for doc in response["source_documents"]] return answer, source_docs def show_popup(source_doc): """ Show a popup with the full content of the selected source document. Args: source_doc: The selected source document. Returns: An update object for the Gradio Textbox component. """ return gr.update( value=f"Source: {source_doc['source']}\n\n{source_doc['full_content']}", visible=True ) # Define the Gradio Blocks interface with gr.Blocks(css=""" .source-box { margin: 5px; padding: 10px; border: 1px solid #ddd; border-radius: 5px; } .source-box:hover { background-color: #f5f5f5; cursor: pointer; } """) as demo: gr.Markdown("# Lang-Chat Chatbot") with gr.Row(): with gr.Column(scale=7): # Chat history component chatbot_component = gr.Chatbot( label="Chat History", height=500, bubble_full_width=False, type="messages" ) with gr.Row(): # Input textbox for user messages msg = gr.Textbox( label="Your Question", placeholder="Ask me anything about LangChain...", scale=8 ) # Submit button submit = gr.Button("Send", scale=1) with gr.Column(scale=3): gr.Markdown("### Source Documents") # Dropdown to select source documents source_dropdown = gr.Dropdown( label="Select a Source Document", interactive=True ) # Textbox to display full content of the selected document popup = gr.Textbox( label="Document Details", interactive=False, visible=False, lines=10 ) # Hidden state to store source data source_data_state = gr.State() def process_message(message, history): """ Process the user's message, update chat history, and prepare source document options. Args: message: The user's message. history: The current chat history. Returns: Updated chat history, updated source dropdown options, and updated source data state. """ if history is None: history = [] answer, sources = chatbot(message, history) # Append the new user message and assistant response to history history.append({"role": "user", "content": message}) history.append(answer) # Prepare options for the dropdown source_options = [] for idx, source in enumerate(sources): option_label = f"{idx+1}. {source['source']} - {source['preview'][:30]}..." source_options.append(option_label) # Store sources in state source_data_state = sources return history, gr.update(choices=source_options, value=None), source_data_state # Define the submit action for both the textbox and the button msg.submit( process_message, [msg, chatbot_component], [chatbot_component, source_dropdown, source_data_state] ) submit.click( process_message, [msg, chatbot_component], [chatbot_component, source_dropdown, source_data_state] ) def show_popup(selected_option, source_data_state): """ Display the full content of the selected source document in a popup. Args: selected_option: The selected option from the dropdown. source_data_state: The list of source documents. Returns: An update object for the popup textbox. """ if selected_option is None: return gr.update(visible=False) sources = source_data_state # Extract index from selected_option idx = int(selected_option.split('.')[0]) - 1 source = sources[idx] full_content = f"Source: {source['source']}\n\n{source['full_content']}" return gr.update(value=full_content, visible=True) # Define the change action for the dropdown source_dropdown.change(show_popup, inputs=[source_dropdown, source_data_state], outputs=popup) # Launch the Gradio interface demo.launch()