lkm2835 commited on
Commit
5fd8b62
ยท
verified ยท
1 Parent(s): 2f607d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -42
app.py CHANGED
@@ -1,28 +1,29 @@
1
  import os
2
- from threading import Thread
3
  from typing import Iterator
4
  import gradio as gr
5
  import torch
6
  import spaces
7
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
8
- from huggingface_hub import InferenceClient
 
 
9
 
10
 
11
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
 
 
 
12
  MODEL = "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct"
13
  MAX_NEW_TOKENS = 4096
14
  DEFAULT_MAX_NEW_TOKENS = 512
15
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "16384"))
16
 
17
-
18
  DESCRIPTION = """\
19
  # <center> EXAONE 3.5: Series of Large Language Models for Real-world Use Cases </center>
20
 
21
  ##### <center> We hope EXAONE continues to advance Expert AI with its effectiveness and bilingual skills. </center>
22
 
23
  <center>๐Ÿ‘‹ For more details, please check <a href=https://huggingface.co/collections/LGAI-EXAONE/exaone-35-674d0e1bb3dcd2ab6f39dbb4>EXAONE-3.5 collections</a>, <a href=https://www.lgresearch.ai/blog/view?seq=507>our blog</a> or <a href=https://arxiv.org/abs/2412.04862>technical report</a></center>
24
-
25
- #### <center> EXAONE-3.5-32B-Instruct Demo Coming Soon.. </center>
26
  """
27
 
28
 
@@ -31,7 +32,10 @@ EXAMPLES = [
31
  ["์Šค์Šค๋กœ๋ฅผ ์ž๋ž‘ํ•ด ๋ด"],
32
  ]
33
  BOT_AVATAR = "EXAONE_logo.png"
34
- selected_model = gr.Radio(value="https://jps6tfdq34ydttbh.us-east4.gcp.endpoints.huggingface.cloud",visible=False)
 
 
 
35
  ADDITIONAL_INPUTS = [
36
  gr.Textbox(
37
  value="You are EXAONE model from LG AI Research, a helpful assistant.",
@@ -59,18 +63,12 @@ ADDITIONAL_INPUTS = [
59
  step=0.05,
60
  value=0.9,
61
  ),
62
- gr.Slider(
63
- label="Top-k",
64
- minimum=1,
65
- maximum=1000,
66
- step=1,
67
- value=1,
68
- ),
69
  selected_model
70
  ]
71
 
72
  tokenizer = AutoTokenizer.from_pretrained("LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct")
73
 
 
74
  def generate(
75
  message: str,
76
  chat_history: list[tuple[str, str]],
@@ -78,13 +76,10 @@ def generate(
78
  max_new_tokens: int = 512,
79
  temperature: float = 0.6,
80
  top_p: float = 0.9,
81
- top_k: int = 50,
82
- selected_model: str = "https://jps6tfdq34ydttbh.us-east4.gcp.endpoints.huggingface.cloud",
83
  ) -> Iterator[str]:
84
- print(f'model: {selected_model}')
85
  messages = [{"role":"system","content": system_prompt}]
86
- print(f'message: {message}')
87
- print(f'chat_history: {chat_history}')
88
  for user, assistant in chat_history:
89
  messages.extend(
90
  [
@@ -94,30 +89,34 @@ def generate(
94
  )
95
  messages.append({"role": "user", "content": message})
96
 
 
 
 
 
 
97
  input_ids = tokenizer.apply_chat_template(
98
  messages,
99
  add_generation_prompt=True,
100
  return_tensors="pt"
101
  )
102
- if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
103
- input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
104
- gr.Warning(f"Trimmed input from messages as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
105
-
106
- messages = tokenizer.decode(input_ids[0])
107
-
108
- client = InferenceClient(selected_model, token=HF_TOKEN)
109
-
110
- gen_kwargs = dict(
111
- max_new_tokens=max_new_tokens,
112
- top_p=top_p,
113
- top_k=top_k,
114
- temperature=temperature,
115
- stop=["[|endofturn|]"]
116
- )
117
 
118
- output = client.text_generation(messages, **gen_kwargs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
- return output
121
 
122
 
123
  def radio1_change(model_size):
@@ -126,8 +125,9 @@ def radio1_change(model_size):
126
 
127
  def choices_model(model_size):
128
  endpoint_url_dict = {
129
- "2.4B": "https://jps6tfdq34ydttbh.us-east4.gcp.endpoints.huggingface.cloud", # L4
130
- "7.8B": "https://wafz6im0d595g715.us-east-1.aws.endpoints.huggingface.cloud", # L40S
 
131
  }
132
  return endpoint_url_dict[model_size]
133
 
@@ -144,16 +144,16 @@ chat_interface = gr.ChatInterface(
144
  stop_btn=None,
145
  examples=EXAMPLES,
146
  cache_examples=False,
147
- )
148
 
149
 
150
  with gr.Blocks(fill_height=True) as demo:
151
- gr.Markdown("""<p align="center"><img src="https://huggingface.co/spaces/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct-Demo/resolve/main/EXAONE_Symbol%2BBI_3d.png" style="margin-right: 20px; height: 50px"/><p>""")
152
  gr.Markdown(DESCRIPTION)
153
 
154
  markdown = gr.Markdown("<center><font size=5>EXAONE-3.5-2.4B-instruct</center>")
155
  with gr.Row():
156
- model_size = ["2.4B", "7.8B"]
157
  radio1 = gr.Radio(choices=model_size, label="EXAONE-3.5-Instruct", value=model_size[0])
158
 
159
  radio1.change(radio1_change, inputs=radio1, outputs=markdown)
 
1
  import os
 
2
  from typing import Iterator
3
  import gradio as gr
4
  import torch
5
  import spaces
6
+ from transformers import AutoTokenizer
7
+ from openai import OpenAI
8
+ import json
9
+ import uuid
10
 
11
 
12
+ EXAONE_TOKEN = os.environ.get("EXAONE_TOKEN", None)
13
+ EXAONE_2_4B = os.environ.get("EXAONE_2_4B", None)
14
+ EXAONE_7_8B = os.environ.get("EXAONE_7_8B", None)
15
+ EXAONE_32B = os.environ.get("EXAONE_32B", None)
16
  MODEL = "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct"
17
  MAX_NEW_TOKENS = 4096
18
  DEFAULT_MAX_NEW_TOKENS = 512
19
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "16384"))
20
 
 
21
  DESCRIPTION = """\
22
  # <center> EXAONE 3.5: Series of Large Language Models for Real-world Use Cases </center>
23
 
24
  ##### <center> We hope EXAONE continues to advance Expert AI with its effectiveness and bilingual skills. </center>
25
 
26
  <center>๐Ÿ‘‹ For more details, please check <a href=https://huggingface.co/collections/LGAI-EXAONE/exaone-35-674d0e1bb3dcd2ab6f39dbb4>EXAONE-3.5 collections</a>, <a href=https://www.lgresearch.ai/blog/view?seq=507>our blog</a> or <a href=https://arxiv.org/abs/2412.04862>technical report</a></center>
 
 
27
  """
28
 
29
 
 
32
  ["์Šค์Šค๋กœ๋ฅผ ์ž๋ž‘ํ•ด ๋ด"],
33
  ]
34
  BOT_AVATAR = "EXAONE_logo.png"
35
+ selected_model = gr.Radio(value=["2.4B", EXAONE_2_4B],visible=False)
36
+ id_ = {"id": str(uuid.uuid4())}
37
+ model_history = {"model_history": []}
38
+
39
  ADDITIONAL_INPUTS = [
40
  gr.Textbox(
41
  value="You are EXAONE model from LG AI Research, a helpful assistant.",
 
63
  step=0.05,
64
  value=0.9,
65
  ),
 
 
 
 
 
 
 
66
  selected_model
67
  ]
68
 
69
  tokenizer = AutoTokenizer.from_pretrained("LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct")
70
 
71
+
72
  def generate(
73
  message: str,
74
  chat_history: list[tuple[str, str]],
 
76
  max_new_tokens: int = 512,
77
  temperature: float = 0.6,
78
  top_p: float = 0.9,
79
+ selected_model: list = ["2.4b", EXAONE_2_4B],
 
80
  ) -> Iterator[str]:
 
81
  messages = [{"role":"system","content": system_prompt}]
82
+
 
83
  for user, assistant in chat_history:
84
  messages.extend(
85
  [
 
89
  )
90
  messages.append({"role": "user", "content": message})
91
 
92
+ if not chat_history:
93
+ id_['id'] = str(uuid.uuid4())
94
+ model_history["model_history"] = []
95
+ model_history["model_history"].append(selected_model[0])
96
+
97
  input_ids = tokenizer.apply_chat_template(
98
  messages,
99
  add_generation_prompt=True,
100
  return_tensors="pt"
101
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
+ client = OpenAI(api_key=EXAONE_TOKEN, base_url="https://api.friendli.ai/dedicated/v1")
104
+ response = client.chat.completions.create(
105
+ messages=messages,
106
+ model=selected_model[1],
107
+ max_tokens=max_new_tokens,
108
+ temperature=temperature,
109
+ top_p=top_p,
110
+ stream=True,
111
+ )
112
+ outputs = ''
113
+ for r in response:
114
+ token = r.choices[0].delta.content
115
+ if token is not None:
116
+ outputs += token
117
+ yield outputs
118
 
119
+ print(json.dumps({"id": id_['id'], "messages": messages, "output": outputs, "model": model_history}, ensure_ascii=False))
120
 
121
 
122
  def radio1_change(model_size):
 
125
 
126
  def choices_model(model_size):
127
  endpoint_url_dict = {
128
+ "2.4B": ["2.4B", EXAONE_2_4B],
129
+ "7.8B": ["7.8B", EXAONE_7_8B],
130
+ "32B": ["32B", EXAONE_32B],
131
  }
132
  return endpoint_url_dict[model_size]
133
 
 
144
  stop_btn=None,
145
  examples=EXAMPLES,
146
  cache_examples=False,
147
+ )
148
 
149
 
150
  with gr.Blocks(fill_height=True) as demo:
151
+ gr.Markdown("""<p align="center"><img src="https://huggingface.co/spaces/LGAI-EXAONE/EXAONE-3.5-Instruct-Demo/resolve/main/EXAONE_Symbol%2BBI_3d.png" style="margin-right: 20px; height: 50px"/><p>""")
152
  gr.Markdown(DESCRIPTION)
153
 
154
  markdown = gr.Markdown("<center><font size=5>EXAONE-3.5-2.4B-instruct</center>")
155
  with gr.Row():
156
+ model_size = ["2.4B", "7.8B", "32B"]
157
  radio1 = gr.Radio(choices=model_size, label="EXAONE-3.5-Instruct", value=model_size[0])
158
 
159
  radio1.change(radio1_change, inputs=radio1, outputs=markdown)