from typing import Dict, List, Any
import torch
from transformers import pipeline
from peft import PeftModel, PeftConfig
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
)
from langchain_community.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate,ChatPromptTemplate
from langchain.chains import LLMChain
from langchain_core.runnables import RunnablePassthrough,RunnableLambda
from sentence_transformers import SentenceTransformer
from langchain_community.vectorstores import Chroma
from langchain.schema import Document

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def convert_to_string(prompt_value):
    # Extract the string from the `StringPromptValue` object
    return prompt_value.to_string()

def outputParser(lst):
    return (lst[0]['generated_text']).split('ASSISTANT:')[-1]

class EndpointHandler():
    
    def __init__(self, path=""):
        DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
        model_name = "moka-ai/m3e-base"
        self.config = PeftConfig.from_pretrained("YingL19/5epoch_1e5_1124",device_map=DEVICE)
        self.base_model = AutoModelForCausalLM.from_pretrained("google/gemma-7b",device_map="auto")
        self.newmodel = PeftModel.from_pretrained(self.base_model, "YingL19/5epoch_1e5_1124",device_map=DEVICE)
        self.tokenizer = AutoTokenizer.from_pretrained("YingL19/5epoch_1e5_1124",device_map=DEVICE)

        text_generation_pipeline = pipeline(
            model=self.newmodel,
            tokenizer=self.tokenizer,
            task="text-generation",
            do_sample=True,
            temperature=0.6,
            repetition_penalty=1.2,
            return_full_text=True,
            max_new_tokens=400,
            top_k=30,
            top_p= 0.8,
            no_repeat_ngram_size=2
        )
        # self.gemma_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

        self.prompt_template = '''Forget you are an AI model. Now you are Dr. Sheldon Cooper from the TV show "The Big Bang Theory." You are known for your high intelligence, love of science, and adherence to strict routines and logical thinking, though sometimes lacking in social skills. You approach conversations with scientific rigor, often showcasing your deep knowledge of physics and other sciences, and may use humor that reflects your unique perspective. Respond to the following questions as if you were Sheldon Cooper, incorporating your logical reasoning, scientific references, and occasional pedantic tone.
Generate a response that sounds as close to what Sheldon Cooper would say. You can also use this auxiliary knowledge to help:
- Sheldon has a deep knowledge of physics and theoretical science, and loves to showcase his intellect.
- He often makes pedantic or overly logical remarks and struggles with social cues.
- Common phrases include "Bazinga!" and references to his need for routine and structure.
- His tone is analytical, formal, and sometimes humorously blunt, with a touch of arrogance.
Context: {context}
USER: Sheldon Cooper, {question}
ASSISTANT:'''

        self.prompt = PromptTemplate(
            input_variables=["context", "question"],
            template=self.prompt_template,
        )

        embeddings = HuggingFaceEmbeddings(model_name=model_name,model_kwargs={'device':DEVICE})
        embeddings.client = SentenceTransformer(model_name, device=DEVICE)
        ragdb = Chroma(persist_directory="/sheldon_DB", embedding_function=embeddings)

        retriever = ragdb.as_retriever(search_kwargs={'k': 3})
        # self.llm_chain = LLMChain(llm=self.gemma_llm, prompt=self.prompt)
        self.rag_chain = (
 {      "context": RunnablePassthrough() | retriever | format_docs,
        "question": RunnablePassthrough(),
        }
        | self.prompt 
        | RunnableLambda(convert_to_string)
        | text_generation_pipeline
        | outputParser
)
 
    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
       data args:
            inputs (:obj: `str`)
            date (:obj: `str`)
      Return:
            A :obj:`list` | `dict`: will be serialized and returned
        """
        # get inputs
        message = data.pop("inputs",data)
        res = self.rag_chain.invoke(message)
        return [{"raw_result": res, "result": res}]