futranbg commited on
Commit
fec6802
1 Parent(s): 6ae1c70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -3
app.py CHANGED
@@ -8,6 +8,7 @@ from huggingface_hub import Repository, InferenceClient
8
 
9
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
10
  API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf"
 
11
  BOT_NAME = "LLAMA"
12
 
13
  STOP_SEQUENCES = ["\nUser:", " User:", "###", "</s>"]
@@ -25,6 +26,11 @@ client = InferenceClient(
25
  headers={"Authorization": f"Bearer {HF_TOKEN}"},
26
  )
27
 
 
 
 
 
 
28
  def format_prompt(message, history, system_prompt):
29
  prompt = ""
30
  if system_prompt:
@@ -72,9 +78,23 @@ def generate(
72
  yield output
73
  yield output
74
  except Exception as e:
75
- raise gr.Error(f"Error while generating: {e}")
76
- return output
 
 
77
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  additional_inputs=[
80
  gr.Textbox("", label="Optional system prompt"),
@@ -116,7 +136,6 @@ additional_inputs=[
116
  )
117
  ]
118
 
119
-
120
  with gr.Blocks() as demo:
121
 
122
  gr.ChatInterface(
 
8
 
9
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
10
  API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf"
11
+ API_URL_2 = "https://api-inference.huggingface.co/models/codellama/CodeLlama-34b-Instruct-hf"
12
  BOT_NAME = "LLAMA"
13
 
14
  STOP_SEQUENCES = ["\nUser:", " User:", "###", "</s>"]
 
26
  headers={"Authorization": f"Bearer {HF_TOKEN}"},
27
  )
28
 
29
+ client2 = InferenceClient(
30
+ API_URL_2,
31
+ headers={"Authorization": f"Bearer {HF_TOKEN}"},
32
+ )
33
+
34
  def format_prompt(message, history, system_prompt):
35
  prompt = ""
36
  if system_prompt:
 
78
  yield output
79
  yield output
80
  except Exception as e:
81
+ raise gr.Error(f"Client 1 error while generating: {e}")
82
+ try:
83
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
84
+ output = ""
85
 
86
+ for response in stream:
87
+ output += response.token.text
88
+
89
+ for stop_str in STOP_SEQUENCES:
90
+ if output.endswith(stop_str):
91
+ output = output[:-len(stop_str)]
92
+ # output = output.rstrip()
93
+ yield output
94
+ yield output
95
+ except Exception as e:
96
+ raise gr.Error(f"Client 2 error while generating: {e}")
97
+ return output
98
 
99
  additional_inputs=[
100
  gr.Textbox("", label="Optional system prompt"),
 
136
  )
137
  ]
138
 
 
139
  with gr.Blocks() as demo:
140
 
141
  gr.ChatInterface(