import eventlet eventlet.monkey_patch() from dotenv import load_dotenv from flask import Flask, request, render_template from flask_cors import CORS from flask_socketio import SocketIO, emit from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate from langchain_core.runnables import RunnablePassthrough from langchain_huggingface.embeddings import HuggingFaceEmbeddings from langchain.retrievers.document_compressors import EmbeddingsFilter from langchain.retrievers import ContextualCompressionRetriever from langchain.retrievers import EnsembleRetriever from langchain_community.vectorstores import FAISS from langchain_groq import ChatGroq from langchain import hub import pickle import os # Load environment variables load_dotenv(".env") USER_AGENT = os.getenv("USER_AGENT") GROQ_API_KEY = os.getenv("GROQ_API_KEY") SECRET_KEY = os.getenv("SECRET_KEY") SESSION_ID_DEFAULT = "abc123" # Set environment variables os.environ['USER_AGENT'] = USER_AGENT os.environ["GROQ_API_KEY"] = GROQ_API_KEY os.environ["TOKENIZERS_PARALLELISM"] = 'true' # Initialize Flask app and SocketIO with CORS app = Flask(__name__) CORS(app) socketio = SocketIO(app, cors_allowed_origins="*") app.config['SESSION_COOKIE_SECURE'] = True # Use HTTPS app.config['SESSION_COOKIE_HTTPONLY'] = True app.config['SECRET_KEY'] = SECRET_KEY embed_model = HuggingFaceEmbeddings(model_name="Alibaba-NLP/gte-multilingual-base", model_kwargs={"trust_remote_code":True}) llm = ChatGroq( model="llama-3.1-8b-instant", temperature=0.0, max_tokens=1024, max_retries=2 ) excel_vectorstore = FAISS.load_local(folder_path="./faiss_excel_doc_index", embeddings=embed_model, allow_dangerous_deserialization=True) word_vectorstore = FAISS.load_local(folder_path="./faiss_recursive_split_word_doc_index", embeddings=embed_model, allow_dangerous_deserialization=True) excel_vectorstore.merge_from(word_vectorstore) combined_vectorstore = excel_vectorstore with open('combined_recursive_keyword_retriever.pkl', 'rb') as f: combined_keyword_retriever = pickle.load(f) # combined_keyword_retriever.k = 1000 semantic_retriever = combined_vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 100}) # initialize the ensemble retriever ensemble_retriever = EnsembleRetriever( retrievers=[combined_keyword_retriever, semantic_retriever], weights=[0.5, 0.5] ) embeddings_filter = EmbeddingsFilter(embeddings=embed_model, similarity_threshold=0.4) compression_retriever = ContextualCompressionRetriever( base_compressor=embeddings_filter, base_retriever=semantic_retriever ) template = """ You are an Arabic AI Assistant focused on providing clear, detailed responses in HTML format with appropriate direction for the Arabic language (right-to-left). - Always answer truthfully. If the user query is irrelevant to the provided CONTEXT, respond by stating why. - For general questions like greetings, reply with formal Arabic greetings. - Generate responses in Arabic, and format any English words and numbers appropriately for clarity. Response Formatting Guidelines: - All responses must be generated in HTML and wrapped inside a
for paragraphs.
- Apply for bold texts.
- Organize content with ordered () or unordered (
) lists as needed.
- Include line breaks (
) where appropriate for readability.
- This is important - Numbers with decimal values should be rounded off to two decimal places.
- This is important - Wrap all English words, numbers, dates, or sentences in a tag to maintain left-to-right directionality.
Additional Instructions:
- Provide detailed yet concise answers, covering all important aspects.
- Ensure proper HTML formatting is applied to the entire response for clarity and structure.
- Only return the AI-generated answer in HTML format.
- Responding outside the provided CONTEXT may result in the termination of the interaction.
CONTEXT: {context}
Query: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
output_parser = StrOutputParser()
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
{"context": compression_retriever.with_config(run_name="Docs") | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| output_parser
)
# Function to handle WebSocket connection
@socketio.on('connect')
def handle_connect():
emit('connection_response', {'message': 'Connected successfully.'}, room=request.sid)
@socketio.on('ping')
def handle_ping(data):
emit('ping_response', {'message': 'Healthy Connection.'}, room=request.sid)
# Function to handle WebSocket disconnection
@socketio.on('disconnect')
def handle_disconnect():
emit('connection_response', {'message': 'Disconnected successfully.'})
# Function to handle WebSocket messages
@socketio.on('message')
def handle_message(data):
question = data.get('question')
try:
for chunk in rag_chain.stream(question):
emit('response', chunk, room=request.sid)
# print(chunk)
except Exception as e:
emit('response', {"error": "An error occurred while processing your request."}, room=request.sid)
# Home route
@app.route("/")
def index_view():
return render_template('chat.html')
# Main function to run the app
if __name__ == '__main__':
socketio.run(app, debug=True)