import eventlet eventlet.monkey_patch() from dotenv import load_dotenv from flask import Flask, request, render_template from flask_cors import CORS from flask_socketio import SocketIO, emit from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate from langchain_core.runnables import RunnablePassthrough from langchain_huggingface.embeddings import HuggingFaceEmbeddings from langchain.retrievers.document_compressors import EmbeddingsFilter from langchain.retrievers import ContextualCompressionRetriever from langchain.retrievers import EnsembleRetriever from langchain_community.vectorstores import FAISS from langchain_groq import ChatGroq from langchain import hub import pickle import os # Load environment variables load_dotenv(".env") USER_AGENT = os.getenv("USER_AGENT") GROQ_API_KEY = os.getenv("GROQ_API_KEY") SECRET_KEY = os.getenv("SECRET_KEY") SESSION_ID_DEFAULT = "abc123" # Set environment variables os.environ['USER_AGENT'] = USER_AGENT os.environ["GROQ_API_KEY"] = GROQ_API_KEY os.environ["TOKENIZERS_PARALLELISM"] = 'true' # Initialize Flask app and SocketIO with CORS app = Flask(__name__) CORS(app) socketio = SocketIO(app, cors_allowed_origins="*") app.config['SESSION_COOKIE_SECURE'] = True # Use HTTPS app.config['SESSION_COOKIE_HTTPONLY'] = True app.config['SECRET_KEY'] = SECRET_KEY embed_model = HuggingFaceEmbeddings(model_name="Alibaba-NLP/gte-multilingual-base", model_kwargs={"trust_remote_code":True}) llm = ChatGroq( model="llama-3.1-8b-instant", temperature=0.0, max_tokens=1024, max_retries=2 ) excel_vectorstore = FAISS.load_local(folder_path="./faiss_excel_doc_index", embeddings=embed_model, allow_dangerous_deserialization=True) word_vectorstore = FAISS.load_local(folder_path="./faiss_recursive_split_word_doc_index", embeddings=embed_model, allow_dangerous_deserialization=True) excel_vectorstore.merge_from(word_vectorstore) combined_vectorstore = excel_vectorstore with open('combined_recursive_keyword_retriever.pkl', 'rb') as f: combined_keyword_retriever = pickle.load(f) # combined_keyword_retriever.k = 1000 semantic_retriever = combined_vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 100}) # initialize the ensemble retriever ensemble_retriever = EnsembleRetriever( retrievers=[combined_keyword_retriever, semantic_retriever], weights=[0.5, 0.5] ) embeddings_filter = EmbeddingsFilter(embeddings=embed_model, similarity_threshold=0.4) compression_retriever = ContextualCompressionRetriever( base_compressor=embeddings_filter, base_retriever=semantic_retriever ) template = """ You are an Arabic AI Assistant focused on providing clear, detailed responses in HTML format with appropriate direction for the Arabic language (right-to-left). - Always answer truthfully. If the user query is irrelevant to the provided CONTEXT, respond by stating why. - For general questions like greetings, reply with formal Arabic greetings. - Generate responses in Arabic, and format any English words and numbers appropriately for clarity. Response Formatting Guidelines: - All responses must be generated in HTML and wrapped inside a
tag. - Utilize proper HTML tags for structuring the response: - Use

for paragraphs. - Apply for bold texts. - Organize content with ordered (

    ) or unordered (
      ) lists as needed. - Include line breaks (
      ) where appropriate for readability. - This is important - Numbers with decimal values should be rounded off to two decimal places. - This is important - Wrap all English words, numbers, dates, or sentences in a tag to maintain left-to-right directionality. Additional Instructions: - Provide detailed yet concise answers, covering all important aspects. - Ensure proper HTML formatting is applied to the entire response for clarity and structure. - Only return the AI-generated answer in HTML format. - Responding outside the provided CONTEXT may result in the termination of the interaction. CONTEXT: {context} Query: {question} """ prompt = ChatPromptTemplate.from_template(template) output_parser = StrOutputParser() def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) rag_chain = ( {"context": compression_retriever.with_config(run_name="Docs") | format_docs, "question": RunnablePassthrough()} | prompt | llm | output_parser ) # Function to handle WebSocket connection @socketio.on('connect') def handle_connect(): emit('connection_response', {'message': 'Connected successfully.'}, room=request.sid) @socketio.on('ping') def handle_ping(data): emit('ping_response', {'message': 'Healthy Connection.'}, room=request.sid) # Function to handle WebSocket disconnection @socketio.on('disconnect') def handle_disconnect(): emit('connection_response', {'message': 'Disconnected successfully.'}) # Function to handle WebSocket messages @socketio.on('message') def handle_message(data): question = data.get('question') try: for chunk in rag_chain.stream(question): emit('response', chunk, room=request.sid) # print(chunk) except Exception as e: emit('response', {"error": "An error occurred while processing your request."}, room=request.sid) # Home route @app.route("/") def index_view(): return render_template('chat.html') # Main function to run the app if __name__ == '__main__': socketio.run(app, debug=True)