|
import os |
|
from typing import Iterator |
|
import gradio as gr |
|
import torch |
|
import spaces |
|
from transformers import AutoTokenizer |
|
from openai import OpenAI |
|
import json |
|
import uuid |
|
|
|
|
|
EXAONE_TOKEN = os.environ.get("EXAONE_TOKEN", None) |
|
EXAONE_2_4B = os.environ.get("EXAONE_2_4B", None) |
|
EXAONE_7_8B = os.environ.get("EXAONE_7_8B", None) |
|
EXAONE_32B = os.environ.get("EXAONE_32B", None) |
|
FRIENDLIAI = "https://friendli.ai" |
|
FRIENDLIAI_LOGO = "https://huggingface.co./spaces/LGAI-EXAONE/EXAONE-3.5-Instruct-Demo/resolve/main/friendliai-logo.png" |
|
MODEL = "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct" |
|
MAX_NEW_TOKENS = 4096 |
|
DEFAULT_MAX_NEW_TOKENS = 512 |
|
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "16384")) |
|
|
|
DESCRIPTION = """\ |
|
<h1 style="text-align: center; margin-top: -23px; margin-bottom: -5px;"> EXAONE 3.5: Series of Large Language Models for Real-world Use Cases</h1> |
|
|
|
#### <center> We hope EXAONE continues to advance Expert AI with its effectiveness and bilingual skills. </center> |
|
|
|
<center>π For more details, please check <a href=https://huggingface.co./collections/LGAI-EXAONE/exaone-35-674d0e1bb3dcd2ab6f39dbb4>EXAONE-3.5 collections</a>, <a href=https://www.lgresearch.ai/blog/view?seq=507>our blog</a> or <a href=https://arxiv.org/abs/2412.04862>technical report</a></center> |
|
""" |
|
|
|
|
|
EXAMPLES = [ |
|
["Explain how wonderful you are"], |
|
["μ€μ€λ‘λ₯Ό μλν΄ λ΄"], |
|
] |
|
BOT_AVATAR = "EXAONE_logo.png" |
|
selected_model = gr.Radio(value=["2.4B", EXAONE_2_4B],visible=False) |
|
id_ = {"id": str(uuid.uuid4())} |
|
model_history = {"model_history": []} |
|
|
|
ADDITIONAL_INPUTS = [ |
|
gr.Textbox( |
|
value="You are EXAONE model from LG AI Research, a helpful assistant.", |
|
label="System Prompt", |
|
render=False, |
|
), |
|
gr.Slider( |
|
label="Max new tokens", |
|
minimum=1, |
|
maximum=MAX_NEW_TOKENS, |
|
step=1, |
|
value=DEFAULT_MAX_NEW_TOKENS, |
|
), |
|
gr.Slider( |
|
label="Temperature", |
|
minimum=0.1, |
|
maximum=2.0, |
|
step=0.1, |
|
value=0.7, |
|
), |
|
gr.Slider( |
|
label="Top-p (nucleus sampling)", |
|
minimum=0.05, |
|
maximum=1.0, |
|
step=0.05, |
|
value=0.9, |
|
), |
|
selected_model |
|
] |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct") |
|
|
|
|
|
def generate( |
|
message: str, |
|
chat_history: list[tuple[str, str]], |
|
system_prompt: str, |
|
max_new_tokens: int = 512, |
|
temperature: float = 0.6, |
|
top_p: float = 0.9, |
|
selected_model: list = ["2.4b", EXAONE_2_4B], |
|
) -> Iterator[str]: |
|
messages = [{"role":"system","content": system_prompt}] |
|
|
|
for user, assistant in chat_history: |
|
messages.extend( |
|
[ |
|
{"role": "user", "content": user}, |
|
{"role": "assistant", "content": assistant}, |
|
] |
|
) |
|
messages.append({"role": "user", "content": message}) |
|
|
|
if not chat_history: |
|
id_['id'] = str(uuid.uuid4()) |
|
model_history["model_history"] = [] |
|
model_history["model_history"].append(selected_model[0]) |
|
|
|
input_ids = tokenizer.apply_chat_template( |
|
messages, |
|
add_generation_prompt=True, |
|
return_tensors="pt" |
|
) |
|
|
|
client = OpenAI(api_key=EXAONE_TOKEN, base_url="https://api.friendli.ai/dedicated/v1") |
|
response = client.chat.completions.create( |
|
messages=messages, |
|
model=selected_model[1], |
|
max_tokens=max_new_tokens, |
|
temperature=temperature, |
|
top_p=top_p, |
|
stream=True, |
|
) |
|
outputs = '' |
|
for r in response: |
|
token = r.choices[0].delta.content |
|
if token is not None: |
|
outputs += token |
|
yield outputs |
|
|
|
print(json.dumps({"id": id_['id'], "messages": messages, "output": outputs, "model": model_history}, ensure_ascii=False)) |
|
|
|
|
|
def radio1_change(model_size): |
|
markdown_ = f""" |
|
<div style="display: flex; width: 450px; margin-left: 535px; font-size: 20px;"> |
|
<span style="margin-top: 6px; margin-right: -2px">EXAONE-3.5-{model_size}-instruct </span> |
|
<span style="margin-top: 10px; margin-left: 7px; font-size: 16px;">powered by</span> |
|
<a href={FRIENDLIAI}><img src={FRIENDLIAI_LOGO} style="margin-left: -4px; height: 41px;"/></a> |
|
</div> |
|
""" |
|
return markdown_ |
|
|
|
def choices_model(model_size): |
|
endpoint_url_dict = { |
|
"2.4B": ["2.4B", EXAONE_2_4B], |
|
"7.8B": ["7.8B", EXAONE_7_8B], |
|
"32B": ["32B", EXAONE_32B], |
|
} |
|
return endpoint_url_dict[model_size] |
|
|
|
|
|
chat_interface = gr.ChatInterface( |
|
fn=generate, |
|
chatbot=gr.Chatbot( |
|
label="EXAONE-3.5-Instruct", |
|
avatar_images=[None, BOT_AVATAR], |
|
layout="bubble", |
|
bubble_full_width=False |
|
), |
|
additional_inputs=ADDITIONAL_INPUTS, |
|
stop_btn=None, |
|
examples=EXAMPLES, |
|
cache_examples=False, |
|
) |
|
|
|
|
|
with gr.Blocks(fill_height=True) as demo: |
|
gr.Markdown("""<p align="center"><img src="https://huggingface.co./spaces/LGAI-EXAONE/EXAONE-3.5-Instruct-Demo/resolve/main/EXAONE_Symbol%2BBI_3d.png" style="margin-right: 20px; height: 50px"/><p>""") |
|
gr.Markdown(DESCRIPTION) |
|
|
|
markdown = gr.Markdown( |
|
f""" |
|
<div style="display: flex; width: 450px; margin-left: 535px; font-size: 20px;"> |
|
<span style="margin-top: 6px; margin-right: -2px">EXAONE-3.5-2.4B-instruct </span> |
|
<span style="margin-top: 10px; margin-left: 7px; font-size: 16px;">powered by</span> |
|
<a href={FRIENDLIAI}><img src={FRIENDLIAI_LOGO} style="margin-left: -4px; height: 41px;"/></a> |
|
</div> |
|
""" |
|
) |
|
with gr.Row(): |
|
model_size = ["2.4B", "7.8B", "32B"] |
|
radio1 = gr.Radio(choices=model_size, label="EXAONE-3.5-Instruct", value=model_size[0]) |
|
|
|
radio1.change(radio1_change, inputs=radio1, outputs=markdown) |
|
radio1.change(choices_model, inputs=radio1, outputs=selected_model) |
|
chat_interface.render() |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.queue(max_size=25).launch() |