import pandas as pd import os import gradio as gr from langchain_groq import ChatGroq from langchain_huggingface import HuggingFaceEmbeddings from langchain_chroma import Chroma from langchain_core.prompts import PromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough # Read JSON file df = pd.read_json("./tourisme_chatbot.json") # Randomly sample a portion of the dataset (e.g., 30% of the data) sampled_df = df.sample(frac=0.2, random_state=42) # Adjust the fraction as needed context_data = [] for i in range(len(sampled_df)): # Use the sampled data context = "" for j in range(4): context += sampled_df.columns[j] context += ": " context += str(sampled_df.iloc[i, j]) # Ensure it's a string for concatenation context += " " context_data.append(context) # Get the secret key from the environment groq_api_key = os.environ.get('groq_api_keys') # Initialize LLM (Groq) llm = ChatGroq(model="llama-3.1-70b-versatile", api_key=groq_api_key) # Initialize Embedding Model (HuggingFace) embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1") # Create Vector Store vectorstore = Chroma( collection_name="tourism_dataset_store", embedding_function=embed_model, persist_directory="./", ) # Add sampled data to the vector store vectorstore.add_texts(context_data) # Set up the retriever retriever = vectorstore.as_retriever() # Define prompt template template = """You are a Moroccan tourism expert. Use the provided context to answer the question. If you don't know the answer, say so. Explain your answer in detail. Do not discuss the context in your response; just provide the answer directly. Context: {context} Question: {question} Answer:""" rag_prompt = PromptTemplate.from_template(template) # Set up the RAG chain rag_chain = ( {"context": retriever, "question": RunnablePassthrough()} | rag_prompt | llm | StrOutputParser() ) # Function for real-time stream of results def rag_memory_stream(text): partial_text = "" for new_text in rag_chain.stream(text): partial_text += new_text yield partial_text # Gradio Interface setup examples = ['Tourist attraction sites in Morocco', 'What are some fun activities to do in Morocco?'] title = "Real-time AI App with Groq API and LangChain to Answer Morocco Tourism questions" demo = gr.Interface( title=title, fn=rag_memory_stream, inputs="text", outputs="text", examples = examples, allow_flagging="never", ) if __name__ == '__main__': demo.launch(share=True)