import eventlet
eventlet.monkey_patch()


from dotenv import load_dotenv
from flask import Flask, request, render_template
from flask_cors import CORS
from flask_socketio import SocketIO, emit

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain.retrievers.document_compressors import EmbeddingsFilter
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers import EnsembleRetriever
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain import hub
import pickle
import os


# Load environment variables
load_dotenv(".env")
USER_AGENT = os.getenv("USER_AGENT")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
SECRET_KEY = os.getenv("SECRET_KEY")
SESSION_ID_DEFAULT = "abc123"


# Set environment variables
os.environ['USER_AGENT'] = USER_AGENT
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
os.environ["TOKENIZERS_PARALLELISM"] = 'true'

# Initialize Flask app and SocketIO with CORS
app = Flask(__name__)
CORS(app)
socketio = SocketIO(app, cors_allowed_origins="*")
app.config['SESSION_COOKIE_SECURE'] = True  # Use HTTPS
app.config['SESSION_COOKIE_HTTPONLY'] = True
app.config['SECRET_KEY'] = SECRET_KEY


embed_model = HuggingFaceEmbeddings(model_name="Alibaba-NLP/gte-multilingual-base", model_kwargs={"trust_remote_code":True})
llm = ChatGroq(
    model="llama-3.1-8b-instant",  
    temperature=0.0,
    max_tokens=1024, 
    max_retries=2
)

excel_vectorstore = FAISS.load_local(folder_path="./faiss_excel_doc_index", embeddings=embed_model, allow_dangerous_deserialization=True)
word_vectorstore = FAISS.load_local(folder_path="./faiss_recursive_split_word_doc_index", embeddings=embed_model, allow_dangerous_deserialization=True)
excel_vectorstore.merge_from(word_vectorstore)
combined_vectorstore = excel_vectorstore

with open('combined_recursive_keyword_retriever.pkl', 'rb') as f:
    combined_keyword_retriever = pickle.load(f)
    # combined_keyword_retriever.k = 1000

semantic_retriever = combined_vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 100})


# initialize the ensemble retriever
ensemble_retriever = EnsembleRetriever(
    retrievers=[combined_keyword_retriever, semantic_retriever], weights=[0.5, 0.5]
)


embeddings_filter = EmbeddingsFilter(embeddings=embed_model, similarity_threshold=0.4)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=embeddings_filter, base_retriever=semantic_retriever
)

template = """
You are an Arabic AI Assistant focused on providing clear, detailed responses in HTML format with appropriate direction for the Arabic language (right-to-left).

    - Always answer truthfully. If the user query is irrelevant to the provided CONTEXT, respond by stating why.
    - For general questions like greetings, reply with formal Arabic greetings.
    - Generate responses in Arabic, and format any English words and numbers appropriately for clarity.

Response Formatting Guidelines:

    - All responses must be generated in HTML and wrapped inside a <div dir="rtl"> tag.
    - Utilize proper HTML tags for structuring the response:
        - Use <p> for paragraphs.
        - Apply <strong> for bold texts.
        - Organize content with ordered (<ol>) or unordered (<ul>) lists as needed.
        - Include line breaks (<br>) where appropriate for readability.
    - This is important - Numbers with decimal values should be rounded off to two decimal places.
    - This is important - Wrap all English words, numbers, dates, or sentences in a <span dir="ltr"> tag to maintain left-to-right directionality.

Additional Instructions:

    - Provide detailed yet concise answers, covering all important aspects.
    - Ensure proper HTML formatting is applied to the entire response for clarity and structure.
    - Only return the AI-generated answer in HTML format.
    - Responding outside the provided CONTEXT may result in the termination of the interaction.

CONTEXT: {context}
Query: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
output_parser = StrOutputParser()

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": compression_retriever.with_config(run_name="Docs") | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | output_parser
)

# Function to handle WebSocket connection
@socketio.on('connect')
def handle_connect():
    emit('connection_response', {'message': 'Connected successfully.'}, room=request.sid)

@socketio.on('ping')
def handle_ping(data):
    emit('ping_response', {'message': 'Healthy Connection.'}, room=request.sid)

# Function to handle WebSocket disconnection
@socketio.on('disconnect')
def handle_disconnect():
    emit('connection_response', {'message': 'Disconnected successfully.'})

# Function to handle WebSocket messages
@socketio.on('message')
def handle_message(data):
    question = data.get('question')
    try:
        for chunk in rag_chain.stream(question):
            emit('response', chunk, room=request.sid)
            # print(chunk)
    except Exception as e:
        emit('response', {"error": "An error occurred while processing your request."}, room=request.sid)


# Home route
@app.route("/")
def index_view():
    return render_template('chat.html') 

# Main function to run the app
if __name__ == '__main__':
    socketio.run(app, debug=True)