import gradio as gr
import spaces
import os
import spaces
import torch
import random
import time
import re
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextStreamer
import transformers


# Set an environment variable
HF_TOKEN = os.environ.get("HF_TOKEN", None)

zero = torch.Tensor([0]).cuda()
print(zero.device) # <-- 'cpu' 🤔

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
peft_model_id = "Imran1/Llama3.1_8b_Dora"
 #attn_implementation="flash_attention_2",
model = AutoModelForCausalLM.from_pretrained(model_id, attn_implementation="sdpa",  torch_dtype= torch.bfloat16)
model.load_adapter(peft_model_id)
model.enable_adapters()
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
# streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
model.to('cuda')

# Set pad_token_id if it's not already set
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

# Define terminators
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

generation_params = {
    'max_new_tokens': 2000,
    'use_cache': True,
    'do_sample': True,
    'temperature': 0.7,
    'top_p': 0.9,
    # 'top_k': 50,
    'pad_token_id': tokenizer.pad_token_id,
    'eos_token_id': terminators,
}


@spaces.GPU
def inference(query):
    messages = [
    {"role": "system", "content": """You are a highly skilled multilingual AI assistant specializing in banking and finance translations, with a focus on BNK Bank's products and services. Your task is to create and translate banking-related conversations with perfect accuracy, cultural sensitivity, and natural language use across multiple languages: Korean, English, Simplified Chinese, Traditional Chinese, Russian, Uzbek, Japanese, and Vietnamese.
1. Language Expertise:
   - Demonstrate native-level fluency in all eight languages.
   - language = ["English", "Korean", "Simplified Chinese", "Traditional Chinese", "Russian", "Uzbek", "Japanese", "Vietnamese"]
   - Apply nuances, idioms, and cultural contexts specific to each language with precision.
   - Ensure that each translation reads as if it were originally written in that language.
2. Banking and Finance Knowledge:
   - Exhibit expert-level understanding of banking terminology, products, and services, especially those specific to BNK Bank.
   - Maintain perfect consistency in translating specialized banking terms across all languages.
   - Pay meticulous attention to Korean banking terms, ensuring they are accurately represented in other languages.
3. BNK Bank-Specific Terminology:
   - The following BNK Bank product names and categories MUST BE TRANSLATED CORRECTLY in each target language:
     a) 외국인 입출금 통장 종류: Only one 통장, Only one 주니어 통장, 보통예금, 자유저축예금, 뱅크라인 통장, 모임통장
     b) 예적금 종류: BNK가을야구정기예금, LIVE정기예금, 저탄소 실천 예금, BNK내맘대로 예금, 가계우대 정기 적금, BNK지역사랑 적금, 꿈이룸 적금, 백세청춘실버 적금, 펫적금, 저탄소 실천 적금, 주택청약종합저축, 더(The) 특판 정기예금
     c) 체크 카드 종류: ZIPL체크, 어디로든그린체크, 동백전체크카드(후불교통도가능), 2030언택트체크(후불교통카드임), 국민행복체크, 카카오페이체크, 딩딩체크, 해피포인트체크, 마이존그린체크, 마이존체크
     d) 신용 카드 종류: (퍼퓸)캐쉬백카드, B Smart(oh point)카드, BNK 2030플래티늄(Platinum)카드, BNK 부자되세요 아파트카드, BNK 부자되세요 홈쇼핑카드, Y카드, 국민행복카드, 그린카드, 그린카드v2, 글로벌카드 서비스, 다문화사랑카드, 다이렉트 오토플러스 서비스, 대한항공(Sky-pass) 제휴카드, 딩딩(DingDing)신용카드, 레포츠카드, 매직패스카드, 명작카드, 문화사랑카드, 부빅스카드, 비씨TOP카드, 승용차요일제카드, 신용카드겸용마이비(Mybi)카드, 아시아나클럽카드(Asiana Club), 울산광역시 승용차요일제카드, 울산사랑카드, 플래티늄(Platinum) 카드, 해피오토카드서비스, 후불교통카드, BNK 프렌즈 신용카드, BNK 부자되세요 더오일카드, 후불하이패스카드, 탑모아신용카드, 메가쇼핑 신용카드, 오늘은e신용카드, 펫(PET)카드, 다이아몬드(Diamond) 카드, 카드형 온누리상품권, SK OIL&LPG카드, 팟(pod)신용카드, 부산체육사랑카드, 어디로든 그린체크카드, ZipL 신용카드, BNK Simple American Express Blue Business 카드
   - Translate these terms accurately and consistently across all languages, providing culturally appropriate explanations or context when necessary.
4. get input language and translate it inti target language.
  - return only translation. without extra explaination and comments.
  - do not return extra text.
  - Return translation only.
"""},
    {"role": "user", "content": f"{query}"}, 
]

    tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
    outputs = model.generate(tokenized_chat, **generation_params)
    decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=False)
    assistant_response = decoded_outputs[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
    response_ = assistant_response.replace('<|eot_id|>', "")
    return assistant_response
    # outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer)
    # return outputs

examples = ["Translate ko to en: \n\n 은행원: 안녕하세요! BNK은행입니다. 무엇을 도와드릴까요? 고객: 안녕하세요. 제가 외국인 입출금 통장을 개설하고 싶은데, 필요한 서류가 무엇인지 궁금합니다. 은행원: 외국인 입출금 통장을 개설하시려면 여권, 외국인 등록증, 그리고 주소 증명서가 필요합니다. 고객: 알겠습니다. 통장 개설 후 입금할 때 수수료가 발생하나요? 은행원: 네, 일반적으로 외국인 통장에 대한 입금 수수료는 없습니다. 하지만 다른 통장으로 이체할 경우 수수료가 발생할 수 있습니다. 더욱 궁금한 점이 있으신가요?"]

def response(message, history):
    text = inference(message)
    return text 
    # for i in range(len(text)):
    #     time.sleep(0.0001)
    #     yield text[: i + 1]
gr.ChatInterface(response,examples=examples).launch()