File size: 6,555 Bytes
a4c7253
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46bc0ec
a4c7253
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e3afc2
a4c7253
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import gradio as gr
import spaces
import os
import spaces
import torch
import random
import time
import re
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextStreamer
import transformers



# Set an environment variable
HF_TOKEN = os.environ.get("HF_TOKEN", None)

zero = torch.Tensor([0]).cuda()
print(zero.device) # <-- 'cpu' ๐Ÿค”

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
peft_model_id = "Imran1/Llama3.1_8b_Dora"
 #attn_implementation="flash_attention_2",
model = AutoModelForCausalLM.from_pretrained(model_id, attn_implementation="sdpa",  torch_dtype= torch.bfloat16)
model.load_adapter(peft_model_id)
model.enable_adapters()
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
# streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
model.to('cuda')

# Set pad_token_id if it's not already set
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

# Define terminators
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

generation_params = {
    'max_new_tokens': 2000,
    'use_cache': True,
    'do_sample': True,
    'temperature': 0.7,
    'top_p': 0.9,
    # 'top_k': 50,
    'pad_token_id': tokenizer.pad_token_id,
    'eos_token_id': terminators,
}


@spaces.GPU
def inference(query):
    messages = [
    {"role": "system", "content": """You are a highly skilled multilingual AI assistant specializing in banking and finance translations, with a focus on BNK Bank's products and services. Your task is to create and translate banking-related conversations with perfect accuracy, cultural sensitivity, and natural language use across multiple languages: Korean, English, Simplified Chinese, Traditional Chinese, Russian, Uzbek, Japanese, and Vietnamese.
1. Language Expertise:
   - Demonstrate native-level fluency in all eight languages.
   - language = ["English", "Korean", "Simplified Chinese", "Traditional Chinese", "Russian", "Uzbek", "Japanese", "Vietnamese"]
   - Apply nuances, idioms, and cultural contexts specific to each language with precision.
   - Ensure that each translation reads as if it were originally written in that language.
2. Banking and Finance Knowledge:
   - Exhibit expert-level understanding of banking terminology, products, and services, especially those specific to BNK Bank.
   - Maintain perfect consistency in translating specialized banking terms across all languages.
   - Pay meticulous attention to Korean banking terms, ensuring they are accurately represented in other languages.
3. BNK Bank-Specific Terminology:
   - The following BNK Bank product names and categories MUST BE TRANSLATED CORRECTLY in each target language:
     a) ์™ธ๊ตญ์ธ ์ž…์ถœ๊ธˆ ํ†ต์žฅ ์ข…๋ฅ˜: Only one ํ†ต์žฅ, Only one ์ฃผ๋‹ˆ์–ด ํ†ต์žฅ, ๋ณดํ†ต์˜ˆ๊ธˆ, ์ž์œ ์ €์ถ•์˜ˆ๊ธˆ, ๋ฑ…ํฌ๋ผ์ธ ํ†ต์žฅ, ๋ชจ์ž„ํ†ต์žฅ
     b) ์˜ˆ์ ๊ธˆ ์ข…๋ฅ˜: BNK๊ฐ€์„์•ผ๊ตฌ์ •๊ธฐ์˜ˆ๊ธˆ, LIVE์ •๊ธฐ์˜ˆ๊ธˆ, ์ €ํƒ„์†Œ ์‹ค์ฒœ ์˜ˆ๊ธˆ, BNK๋‚ด๋ง˜๋Œ€๋กœ ์˜ˆ๊ธˆ, ๊ฐ€๊ณ„์šฐ๋Œ€ ์ •๊ธฐ ์ ๊ธˆ, BNK์ง€์—ญ์‚ฌ๋ž‘ ์ ๊ธˆ, ๊ฟˆ์ด๋ฃธ ์ ๊ธˆ, ๋ฐฑ์„ธ์ฒญ์ถ˜์‹ค๋ฒ„ ์ ๊ธˆ, ํŽซ์ ๊ธˆ, ์ €ํƒ„์†Œ ์‹ค์ฒœ ์ ๊ธˆ, ์ฃผํƒ์ฒญ์•ฝ์ข…ํ•ฉ์ €์ถ•, ๋”(The) ํŠนํŒ ์ •๊ธฐ์˜ˆ๊ธˆ
     c) ์ฒดํฌ ์นด๋“œ ์ข…๋ฅ˜: ZIPL์ฒดํฌ, ์–ด๋””๋กœ๋“ ๊ทธ๋ฆฐ์ฒดํฌ, ๋™๋ฐฑ์ „์ฒดํฌ์นด๋“œ(ํ›„๋ถˆ๊ตํ†ต๋„๊ฐ€๋Šฅ), 2030์–ธํƒํŠธ์ฒดํฌ(ํ›„๋ถˆ๊ตํ†ต์นด๋“œ์ž„), ๊ตญ๋ฏผํ–‰๋ณต์ฒดํฌ, ์นด์นด์˜คํŽ˜์ด์ฒดํฌ, ๋”ฉ๋”ฉ์ฒดํฌ, ํ•ดํ”ผํฌ์ธํŠธ์ฒดํฌ, ๋งˆ์ด์กด๊ทธ๋ฆฐ์ฒดํฌ, ๋งˆ์ด์กด์ฒดํฌ
     d) ์‹ ์šฉ ์นด๋“œ ์ข…๋ฅ˜: (ํผํ“ธ)์บ์‰ฌ๋ฐฑ์นด๋“œ, B Smart(oh point)์นด๋“œ, BNK 2030ํ”Œ๋ž˜ํ‹ฐ๋Š„(Platinum)์นด๋“œ, BNK ๋ถ€์ž๋˜์„ธ์š” ์•„ํŒŒํŠธ์นด๋“œ, BNK ๋ถ€์ž๋˜์„ธ์š” ํ™ˆ์‡ผํ•‘์นด๋“œ, Y์นด๋“œ, ๊ตญ๋ฏผํ–‰๋ณต์นด๋“œ, ๊ทธ๋ฆฐ์นด๋“œ, ๊ทธ๋ฆฐ์นด๋“œv2, ๊ธ€๋กœ๋ฒŒ์นด๋“œ ์„œ๋น„์Šค, ๋‹ค๋ฌธํ™”์‚ฌ๋ž‘์นด๋“œ, ๋‹ค์ด๋ ‰ํŠธ ์˜คํ† ํ”Œ๋Ÿฌ์Šค ์„œ๋น„์Šค, ๋Œ€ํ•œํ•ญ๊ณต(Sky-pass) ์ œํœด์นด๋“œ, ๋”ฉ๋”ฉ(DingDing)์‹ ์šฉ์นด๋“œ, ๋ ˆํฌ์ธ ์นด๋“œ, ๋งค์งํŒจ์Šค์นด๋“œ, ๋ช…์ž‘์นด๋“œ, ๋ฌธํ™”์‚ฌ๋ž‘์นด๋“œ, ๋ถ€๋น…์Šค์นด๋“œ, ๋น„์”จTOP์นด๋“œ, ์Šน์šฉ์ฐจ์š”์ผ์ œ์นด๋“œ, ์‹ ์šฉ์นด๋“œ๊ฒธ์šฉ๋งˆ์ด๋น„(Mybi)์นด๋“œ, ์•„์‹œ์•„๋‚˜ํด๋Ÿฝ์นด๋“œ(Asiana Club), ์šธ์‚ฐ๊ด‘์—ญ์‹œ ์Šน์šฉ์ฐจ์š”์ผ์ œ์นด๋“œ, ์šธ์‚ฐ์‚ฌ๋ž‘์นด๋“œ, ํ”Œ๋ž˜ํ‹ฐ๋Š„(Platinum) ์นด๋“œ, ํ•ดํ”ผ์˜คํ† ์นด๋“œ์„œ๋น„์Šค, ํ›„๋ถˆ๊ตํ†ต์นด๋“œ, BNK ํ”„๋ Œ์ฆˆ ์‹ ์šฉ์นด๋“œ, BNK ๋ถ€์ž๋˜์„ธ์š” ๋”์˜ค์ผ์นด๋“œ, ํ›„๋ถˆํ•˜์ดํŒจ์Šค์นด๋“œ, ํƒ‘๋ชจ์•„์‹ ์šฉ์นด๋“œ, ๋ฉ”๊ฐ€์‡ผํ•‘ ์‹ ์šฉ์นด๋“œ, ์˜ค๋Š˜์€e์‹ ์šฉ์นด๋“œ, ํŽซ(PET)์นด๋“œ, ๋‹ค์ด์•„๋ชฌ๋“œ(Diamond) ์นด๋“œ, ์นด๋“œํ˜• ์˜จ๋ˆ„๋ฆฌ์ƒํ’ˆ๊ถŒ, SK OIL&LPG์นด๋“œ, ํŒŸ(pod)์‹ ์šฉ์นด๋“œ, ๋ถ€์‚ฐ์ฒด์œก์‚ฌ๋ž‘์นด๋“œ, ์–ด๋””๋กœ๋“  ๊ทธ๋ฆฐ์ฒดํฌ์นด๋“œ, ZipL ์‹ ์šฉ์นด๋“œ, BNK Simple American Express Blue Business ์นด๋“œ
   - Translate these terms accurately and consistently across all languages, providing culturally appropriate explanations or context when necessary.
4. get input language and translate it inti target language.
  - return only translation. without extra explaination and comments.
  - do not return extra text.
  - Return translation only.
"""},
    {"role": "user", "content": f"{query}"}, 
]

    tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
    outputs = model.generate(tokenized_chat, **generation_params)
    decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=False)
    assistant_response = decoded_outputs[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
    response_ = assistant_response.replace('<|eot_id|>', "")
    return assistant_response
    # outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer)
    # return outputs

examples = ["Translate ko to en: \n\n ์€ํ–‰์›: ์•ˆ๋…•ํ•˜์„ธ์š”! BNK์€ํ–‰์ž…๋‹ˆ๋‹ค. ๋ฌด์—‡์„ ๋„์™€๋“œ๋ฆด๊นŒ์š”? ๊ณ ๊ฐ: ์•ˆ๋…•ํ•˜์„ธ์š”. ์ œ๊ฐ€ ์™ธ๊ตญ์ธ ์ž…์ถœ๊ธˆ ํ†ต์žฅ์„ ๊ฐœ์„คํ•˜๊ณ  ์‹ถ์€๋ฐ, ํ•„์š”ํ•œ ์„œ๋ฅ˜๊ฐ€ ๋ฌด์—‡์ธ์ง€ ๊ถ๊ธˆํ•ฉ๋‹ˆ๋‹ค. ์€ํ–‰์›: ์™ธ๊ตญ์ธ ์ž…์ถœ๊ธˆ ํ†ต์žฅ์„ ๊ฐœ์„คํ•˜์‹œ๋ ค๋ฉด ์—ฌ๊ถŒ, ์™ธ๊ตญ์ธ ๋“ฑ๋ก์ฆ, ๊ทธ๋ฆฌ๊ณ  ์ฃผ์†Œ ์ฆ๋ช…์„œ๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. ๊ณ ๊ฐ: ์•Œ๊ฒ ์Šต๋‹ˆ๋‹ค. ํ†ต์žฅ ๊ฐœ์„ค ํ›„ ์ž…๊ธˆํ•  ๋•Œ ์ˆ˜์ˆ˜๋ฃŒ๊ฐ€ ๋ฐœ์ƒํ•˜๋‚˜์š”? ์€ํ–‰์›: ๋„ค, ์ผ๋ฐ˜์ ์œผ๋กœ ์™ธ๊ตญ์ธ ํ†ต์žฅ์— ๋Œ€ํ•œ ์ž…๊ธˆ ์ˆ˜์ˆ˜๋ฃŒ๋Š” ์—†์Šต๋‹ˆ๋‹ค. ํ•˜์ง€๋งŒ ๋‹ค๋ฅธ ํ†ต์žฅ์œผ๋กœ ์ด์ฒดํ•  ๊ฒฝ์šฐ ์ˆ˜์ˆ˜๋ฃŒ๊ฐ€ ๋ฐœ์ƒํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ๋”์šฑ ๊ถ๊ธˆํ•œ ์ ์ด ์žˆ์œผ์‹ ๊ฐ€์š”?"]

def response(message, history):
    text = inference(message)
    return text 
    # for i in range(len(text)):
    #     time.sleep(0.0001)
    #     yield text[: i + 1]
gr.ChatInterface(response,examples=examples).launch()