Sakalti commited on
Commit
5a64991
·
verified ·
1 Parent(s): 8edf56a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -17
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import time
 
4
 
5
  client = InferenceClient("Qwen/Qwen2.5-3b-Instruct")
6
 
@@ -10,11 +11,11 @@ def respond(
10
  system_message,
11
  max_tokens,
12
  temperature,
13
- top_p,
14
- progress=gr.Progress() # 進捗表示用
15
  ):
16
  messages = [{"role": "system", "content": system_message}]
17
-
 
18
  for val in history:
19
  if val[0]:
20
  messages.append({"role": "user", "content": val[0]})
@@ -23,22 +24,39 @@ def respond(
23
 
24
  messages.append({"role": "user", "content": message})
25
 
26
- # AI応答時間計測開始
27
- start_time = time.time()
28
- response = client.chat_completion(
29
- messages,
30
- max_tokens=max_tokens,
31
- temperature=temperature,
32
- top_p=top_p,
33
- )
34
- elapsed_time = time.time() - start_time # AI応答時間計測終了
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- # ユーザーに進捗を表示
37
- progress(0, f"応答中... {elapsed_time:.2f}秒") # 初期応答時間表示
38
- time.sleep(0.5) # 応答中に少し待機
39
- total_response_time = elapsed_time + 0.5 # 総応答時間を計算
40
 
41
- return response.choices[0].message.content, f"予測時間: {elapsed_time:.2f}秒 / 総応答時間: {total_response_time:.2f}秒"
42
 
43
  demo = gr.ChatInterface(
44
  respond,
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import time
4
+ import threading
5
 
6
  client = InferenceClient("Qwen/Qwen2.5-3b-Instruct")
7
 
 
11
  system_message,
12
  max_tokens,
13
  temperature,
14
+ top_p
 
15
  ):
16
  messages = [{"role": "system", "content": system_message}]
17
+
18
+ # メッセージ履歴を追加
19
  for val in history:
20
  if val[0]:
21
  messages.append({"role": "user", "content": val[0]})
 
24
 
25
  messages.append({"role": "user", "content": message})
26
 
27
+ # 応答生成の別スレッド処理
28
+ def ai_response():
29
+ nonlocal response_content
30
+ start_time = time.time()
31
+ response = client.chat_completion(
32
+ messages,
33
+ max_tokens=max_tokens,
34
+ temperature=temperature,
35
+ top_p=top_p,
36
+ )
37
+ elapsed_time = time.time() - start_time # 応答時間計測
38
+ response_content = response.choices[0].message.content
39
+ response_time = f"応答にかかった時間: {elapsed_time:.2f}秒"
40
+ return response_content, response_time
41
+
42
+ # 応答時間を表示するためのスレッド
43
+ response_content = "応答生成中です..."
44
+ thread = threading.Thread(target=ai_response)
45
+ thread.start()
46
+
47
+ # 応答を返すまでの間、経過時間を更新
48
+ elapsed_time_display = ""
49
+ elapsed_time = 0
50
+ while thread.is_alive():
51
+ elapsed_time += 1
52
+ elapsed_time_display = f"{elapsed_time}秒経過..."
53
+ time.sleep(1)
54
+ if not thread.is_alive():
55
+ break
56
 
57
+ thread.join() # スレッド終了を待機
 
 
 
58
 
59
+ return response_content, elapsed_time_display
60
 
61
  demo = gr.ChatInterface(
62
  respond,