Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,8 @@ from share_btn import community_icon_html, loading_icon_html, share_js, share_bt
|
|
11 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
12 |
API_URL = os.environ.get("API_URL")
|
13 |
|
|
|
|
|
14 |
|
15 |
FIM_PREFIX = "<fim_prefix>"
|
16 |
FIM_MIDDLE = "<fim_middle>"
|
@@ -22,6 +24,9 @@ FORMATS = """## Model formats
|
|
22 |
|
23 |
The model is pretrained on code and in addition to the pure code data it is formatted with special tokens. E.g. prefixes specifying the source of the file or special tokens separating code from a commit message. See below:
|
24 |
|
|
|
|
|
|
|
25 |
### Prefixes
|
26 |
Any combination of the three following prefixes can be found in pure code files:
|
27 |
|
@@ -64,11 +69,10 @@ theme = gr.themes.Monochrome(
|
|
64 |
)
|
65 |
|
66 |
client = Client(
|
67 |
-
API_URL,
|
68 |
-
#headers={"Authorization": f"Bearer {HF_TOKEN}"},
|
69 |
)
|
70 |
|
71 |
-
def generate(prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
|
72 |
|
73 |
temperature = float(temperature)
|
74 |
if temperature < 1e-2:
|
@@ -85,18 +89,27 @@ def generate(prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition
|
|
85 |
seed=42,
|
86 |
)
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
if FIM_INDICATOR in prompt:
|
89 |
fim_mode = True
|
90 |
try:
|
91 |
prefix, suffix = prompt.split(FIM_INDICATOR)
|
92 |
except:
|
93 |
-
ValueError(f"Only one {FIM_INDICATOR} allowed in prompt!")
|
94 |
prompt = f"{FIM_PREFIX}{prefix}{FIM_SUFFIX}{suffix}{FIM_MIDDLE}"
|
95 |
|
96 |
stream = client.generate_stream(prompt, **generate_kwargs)
|
97 |
|
98 |
if fim_mode:
|
99 |
output = prefix
|
|
|
|
|
100 |
else:
|
101 |
output = prompt
|
102 |
|
@@ -155,7 +168,11 @@ _Note:_ this is an internal playground - please do not share. The deployment can
|
|
155 |
gr.Markdown(FORMATS)
|
156 |
|
157 |
with gr.Column(scale=1):
|
158 |
-
|
|
|
|
|
|
|
|
|
159 |
temperature = gr.Slider(
|
160 |
label="Temperature",
|
161 |
value=0.2,
|
@@ -193,7 +210,7 @@ _Note:_ this is an internal playground - please do not share. The deployment can
|
|
193 |
info="Penalize repeated tokens",
|
194 |
)
|
195 |
|
196 |
-
submit.click(generate, inputs=[instruction, temperature, max_new_tokens, top_p, repetition_penalty], outputs=[output])
|
197 |
-
instruction.submit(generate, inputs=[instruction, temperature, max_new_tokens, top_p, repetition_penalty], outputs=[output])
|
198 |
share_button.click(None, [], [], _js=share_js)
|
199 |
demo.queue(concurrency_count=16).launch(debug=True)
|
|
|
11 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
12 |
API_URL = os.environ.get("API_URL")
|
13 |
|
14 |
+
with open("https://huggingface.co/spaces/bigcode/bigcode-playground/raw/main/HHH_prompt.txt", "r") as f:
|
15 |
+
HHH_PROMPT = f.read() + "\n\n"
|
16 |
|
17 |
FIM_PREFIX = "<fim_prefix>"
|
18 |
FIM_MIDDLE = "<fim_middle>"
|
|
|
24 |
|
25 |
The model is pretrained on code and in addition to the pure code data it is formatted with special tokens. E.g. prefixes specifying the source of the file or special tokens separating code from a commit message. See below:
|
26 |
|
27 |
+
### Chat mode
|
28 |
+
Chat mode prepends the [HHH prompt](https://gist.github.com/jareddk/2509330f8ef3d787fc5aaac67aab5f11#file-hhh_prompt-txt) from Anthropic to the request which conditions the model to be an assistant.
|
29 |
+
|
30 |
### Prefixes
|
31 |
Any combination of the three following prefixes can be found in pure code files:
|
32 |
|
|
|
69 |
)
|
70 |
|
71 |
client = Client(
|
72 |
+
API_URL, #headers={"Authorization": f"Bearer {HF_TOKEN}"},
|
|
|
73 |
)
|
74 |
|
75 |
+
def generate(prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0, chat_mode=False):
|
76 |
|
77 |
temperature = float(temperature)
|
78 |
if temperature < 1e-2:
|
|
|
89 |
seed=42,
|
90 |
)
|
91 |
|
92 |
+
if chat_mode and FIM_INDICATOR in prompt:
|
93 |
+
raise ValueError("Chat mode and FIM are mutually exclusive. Choose one or the other.")
|
94 |
+
|
95 |
+
if chat_mode:
|
96 |
+
chat_prompt = "Human: " + prompt + "\n\nAssistant:"
|
97 |
+
prompt = HHH_PROMPT + chat_prompt
|
98 |
+
|
99 |
if FIM_INDICATOR in prompt:
|
100 |
fim_mode = True
|
101 |
try:
|
102 |
prefix, suffix = prompt.split(FIM_INDICATOR)
|
103 |
except:
|
104 |
+
raise ValueError(f"Only one {FIM_INDICATOR} allowed in prompt!")
|
105 |
prompt = f"{FIM_PREFIX}{prefix}{FIM_SUFFIX}{suffix}{FIM_MIDDLE}"
|
106 |
|
107 |
stream = client.generate_stream(prompt, **generate_kwargs)
|
108 |
|
109 |
if fim_mode:
|
110 |
output = prefix
|
111 |
+
elif: chat_mode:
|
112 |
+
output = chat_prompt
|
113 |
else:
|
114 |
output = prompt
|
115 |
|
|
|
168 |
gr.Markdown(FORMATS)
|
169 |
|
170 |
with gr.Column(scale=1):
|
171 |
+
chat_mode = gr.Checkbox(
|
172 |
+
value=False,
|
173 |
+
label="Chat mode",
|
174 |
+
info="Uses Anthropic's HHH prompt to turn the model into an assistant."
|
175 |
+
)
|
176 |
temperature = gr.Slider(
|
177 |
label="Temperature",
|
178 |
value=0.2,
|
|
|
210 |
info="Penalize repeated tokens",
|
211 |
)
|
212 |
|
213 |
+
submit.click(generate, inputs=[instruction, temperature, max_new_tokens, top_p, repetition_penalty, chat_mode], outputs=[output])
|
214 |
+
# instruction.submit(generate, inputs=[instruction, temperature, max_new_tokens, top_p, repetition_penalty, chat_mode], outputs=[output])
|
215 |
share_button.click(None, [], [], _js=share_js)
|
216 |
demo.queue(concurrency_count=16).launch(debug=True)
|