# Importing Python libraries
import os
import asyncio
from dotenv import load_dotenv

import chainlit as cl

from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ChatMessageHistory, ConversationBufferMemory
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import Qdrant
from langchain_openai import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
import tiktoken

# Load environment variables from a .env file
load_dotenv()

@cl.on_chat_start
async def start_chat():
    # Notify the user that the system is setting up the vector store
    await cl.Message(content="Setting up Qdrant vector store. Please wait...").send()

    # Load documents using PyMuPDFLoader from the specified URL
    docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()

    # Define a function to calculate the token length using tiktoken
    def tiktoken_len(text):
        tokens = tiktoken.encoding_for_model("gpt-3.5-turbo").encode(text)
        return len(tokens)

    # Configure a text splitter that handles large documents
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 1000,
        chunk_overlap = 0,  # Ensure there is no cutoff at the edges of chunks
        length_function = tiktoken_len,
    )

    # Split the document into manageable chunks
    split_chunks = text_splitter.split_documents(docs)

    # Set up the embedding model for document encoding
    embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

    # Asynchronously create a Qdrant vector store with the document chunks
    qdrant_vectorstore = await cl.make_async(Qdrant.from_documents)(
        split_chunks, 
        embedding_model, 
        location=":memory:",  # Use in-memory storage for vectors
        collection_name="meta_10k"  # Name of the collection in Qdrant
    )

    # Initialize a retriever from the Qdrant vector store
    qdrant_retriever = qdrant_vectorstore.as_retriever()

    # Notify the user that setup is complete
    await cl.Message(content="Qdrant setup complete. You can now start asking questions!").send()

    # Initialize a message history to track the conversation
    message_history = ChatMessageHistory()

    # Set up memory to hold the conversation context and return answers
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        output_key="answer",
        chat_memory=message_history,
        return_messages=True,
    )

    # Configure the LLM for generating responses
    llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True)

    # Create a retrieval chain combining the LLM and the retriever
    chain = ConversationalRetrievalChain.from_llm(
        llm,
        retriever=qdrant_retriever,
        chain_type="stuff",  # Specify the type of chain (customizable based on application)
        memory=memory,
        return_source_documents=True
    )

    # Store the configured chain in the user session
    cl.user_session.set("chain", chain)

@cl.on_message
async def main(message: cl.Message):
    # Retrieve the conversational chain from the user session
    chain = cl.user_session.get("chain")
    # Define a callback handler for asynchronous operations
    cb = cl.AsyncLangchainCallbackHandler()

    # Process the incoming message using the conversational chain
    res = await chain.acall(message.content, callbacks=[cb])
    answer = res["answer"]  # Extract the answer from the response

    # Send the processed answer back to the user
    await cl.Message(content=answer).send()