from typing import Dict, List, Any import torch from transformers import pipeline from peft import PeftModel, PeftConfig from langchain.embeddings.huggingface import HuggingFaceEmbeddings from transformers import ( AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, pipeline, ) from langchain_community.llms import HuggingFacePipeline from langchain.prompts import PromptTemplate,ChatPromptTemplate from langchain.chains import LLMChain from langchain_core.runnables import RunnablePassthrough,RunnableLambda from sentence_transformers import SentenceTransformer from langchain_community.vectorstores import Chroma from langchain.schema import Document def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) def convert_to_string(prompt_value): # Extract the string from the `StringPromptValue` object return prompt_value.to_string() def outputParser(lst): return (lst[0]['generated_text']).split('ASSISTANT:')[-1] class EndpointHandler(): def __init__(self, path=""): DEVICE = "cuda" if torch.cuda.is_available() else "cpu" model_name = "moka-ai/m3e-base" self.config = PeftConfig.from_pretrained("YingL19/5epoch_1e5_1124",device_map=DEVICE) self.base_model = AutoModelForCausalLM.from_pretrained("google/gemma-7b",device_map="auto") self.newmodel = PeftModel.from_pretrained(self.base_model, "YingL19/5epoch_1e5_1124",device_map=DEVICE) self.tokenizer = AutoTokenizer.from_pretrained("YingL19/5epoch_1e5_1124",device_map=DEVICE) text_generation_pipeline = pipeline( model=self.newmodel, tokenizer=self.tokenizer, task="text-generation", do_sample=True, temperature=0.6, repetition_penalty=1.2, return_full_text=True, max_new_tokens=400, top_k=30, top_p= 0.8, no_repeat_ngram_size=2 ) # self.gemma_llm = HuggingFacePipeline(pipeline=text_generation_pipeline) self.prompt_template = '''Forget you are an AI model. Now you are Dr. Sheldon Cooper from the TV show "The Big Bang Theory." You are known for your high intelligence, love of science, and adherence to strict routines and logical thinking, though sometimes lacking in social skills. You approach conversations with scientific rigor, often showcasing your deep knowledge of physics and other sciences, and may use humor that reflects your unique perspective. Respond to the following questions as if you were Sheldon Cooper, incorporating your logical reasoning, scientific references, and occasional pedantic tone. Generate a response that sounds as close to what Sheldon Cooper would say. You can also use this auxiliary knowledge to help: - Sheldon has a deep knowledge of physics and theoretical science, and loves to showcase his intellect. - He often makes pedantic or overly logical remarks and struggles with social cues. - Common phrases include "Bazinga!" and references to his need for routine and structure. - His tone is analytical, formal, and sometimes humorously blunt, with a touch of arrogance. Context: {context} USER: Sheldon Cooper, {question} ASSISTANT:''' self.prompt = PromptTemplate( input_variables=["context", "question"], template=self.prompt_template, ) embeddings = HuggingFaceEmbeddings(model_name=model_name,model_kwargs={'device':DEVICE}) embeddings.client = SentenceTransformer(model_name, device=DEVICE) ragdb = Chroma(persist_directory="/sheldon_DB", embedding_function=embeddings) retriever = ragdb.as_retriever(search_kwargs={'k': 3}) # self.llm_chain = LLMChain(llm=self.gemma_llm, prompt=self.prompt) self.rag_chain = ( { "context": RunnablePassthrough() | retriever | format_docs, "question": RunnablePassthrough(), } | self.prompt | RunnableLambda(convert_to_string) | text_generation_pipeline | outputParser ) def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: """ data args: inputs (:obj: `str`) date (:obj: `str`) Return: A :obj:`list` | `dict`: will be serialized and returned """ # get inputs message = data.pop("inputs",data) res = self.rag_chain.invoke(message) return [{"raw_result": res, "result": res}]