klimbr-demo / app.py
wxgeorge's picture
:wrench: jack up default klimbr rate.
a920174
from openai import OpenAI
import gradio as gr
import os
import json
import html
import random
import datetime
api_key = os.environ.get('FEATHERLESS_API_KEY')
client = OpenAI(
base_url="https://api.featherless.ai/v1",
api_key=api_key
)
# from https://github.com/av/klmbr/blob/ca2967123d171fc6d91c329c40e5050a86088446/klmbr/main.py
# I sure which I could import this, but can't figure out how to make HF spaces run this as a module
# and not a file.
import random
mods = [
"capitalize",
"diacritic",
'leetspeak',
"remove_vowel",
]
def klimbr_randomize(text, percentage):
if not text:
return "", {} # Return empty string and empty mapping if input is empty
if not 0 <= percentage <= 100:
raise ValueError("Percentage must be between 0 and 100")
words = text.split()
chars = list(text)
num_chars_to_modify = max(1, int(len(chars) * (percentage / 100)))
indices_to_modify = random.sample(range(len(chars)), num_chars_to_modify)
word_mapping = {}
for idx in indices_to_modify:
modification = random.choice(mods)
# Find the word that contains the current character
current_length = 0
for word_idx, word in enumerate(words):
if current_length <= idx < current_length + len(word):
original_word = word
word_start_idx = current_length
break
current_length += len(word) + 1 # +1 for the space
else:
# If we're here, we're likely dealing with a space or the last character
continue
if modification == "capitalize":
chars[idx] = chars[idx].swapcase()
elif modification == "diacritic":
if chars[idx].isalpha():
diacritics = ["̀", "́", "̂", "̃", "̈", "̄", "̆", "̇", "̊", "̋"]
chars[idx] = chars[idx] + random.choice(diacritics)
elif modification == "leetspeak":
leetspeak_map = {
"a": "4", "e": "3", "i": "1", "o": "0", "s": "5",
"t": "7", "b": "8", "g": "9", "l": "1",
}
chars[idx] = leetspeak_map.get(chars[idx].lower(), chars[idx])
elif modification == "remove_vowel":
if chars[idx].lower() in "aeiou":
chars[idx] = ""
modified_word = "".join(
chars[word_start_idx : word_start_idx + len(original_word)]
)
if modified_word != original_word:
# Clean up both the modified word and the original word
cleaned_modified_word = modified_word.rstrip('.,')
cleaned_original_word = original_word.rstrip('.,')
word_mapping[cleaned_modified_word] = cleaned_original_word
modified_text = "".join(chars)
return modified_text, word_mapping
## end of klimbr inclusion
klimbr_cache = {}
def memoized_klimbr(message, percentage, extra, last=False):
key = (message, percentage, extra)
# _always_ re-randomize the last message
if last and key in klimbr_cache:
klimbr_cache.pop(key)
if key not in klimbr_cache:
klimbr_cache[key] = klimbr_randomize(message, percentage)[0]
return klimbr_cache[key]
LOG_TRANSLATIONS=os.environ.get('LOG_TRANSLATIONS', True)
def klimberize_conversation(message, history, percentage, log=LOG_TRANSLATIONS):
# we memoize the klimbrization of strings.
# this is to work with the gradio chat interface model
# so that messages are not _re_-randomized at each conversation turn
klimbred_history = [
(memoized_klimbr(human, percentage, index), assistant)
for index, (human, assistant) in enumerate(history)
]
klimbred_message = memoized_klimbr(message, percentage, len(history), last=True)
if log:
for original, kbed in zip([*[u for u,a in history], message], [*[u for u,a in klimbred_history], klimbred_message]):
print(f"Translated '{original}' as '{kbed}'")
return (klimbred_message, klimbred_history)
def respond(message, history, model, klimbr_percentage):
history_openai_format = []
message, history = klimberize_conversation(message, history, klimbr_percentage)
for human, assistant in history:
history_openai_format.append({"role": "user", "content": human })
history_openai_format.append({"role": "assistant", "content":assistant})
history_openai_format.append({"role": "user", "content": message})
response = client.chat.completions.create(
model=model,
messages= history_openai_format,
temperature=1.0,
stream=True,
max_tokens=2000,
extra_headers={
'HTTP-Referer': 'https://huggingface.co./spaces/featherless-ai/klimbr-demo',
'X-Title': "Klimbr demo space"
}
)
partial_message = ""
for chunk in response:
if chunk.choices[0].delta.content is not None:
content = chunk.choices[0].delta.content
escaped_content = html.escape(content)
partial_message += escaped_content
yield partial_message
logo = open('./logo.svg').read()
# we chose a few models across the smaller model classes to give a sense of the technique
MODEL_CHOICES = {
"llama2-13b-4k": [
"NousResearch/Nous-Hermes-Llama2-13b",
],
"llama3-8b-8k": [
"meta-llama/Meta-Llama-3-8B-Instruct",
"NousResearch/Hermes-2-Theta-Llama-3-8B",
"aaditya/Llama3-OpenBioLLM-8B",
"elyza/Llama-3-ELYZA-JP-8B",
"mlabonne/NeuralDaredevil-8B-abliterated",
],
"llama31-8b-16k": [
"meta-llama/Meta-Llama-3.1-8B-Instruct",
"NousResearch/Hermes-3-Llama-3.1-8B",
"shenzhi-wang/Llama3.1-8B-Chinese-Chat",
"AXCXEPT/Llama-3.1-8B-EZO-1.1-it",
"mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated",
"VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct",
],
"mistral-v02-7b-lc": [
"HuggingFaceH4/zephyr-7b-beta",
"mlabonne/NeuralDaredevil-7B",
"HuggingFaceH4/zephyr-7b-alpha",
],
"mistral-nemo-12b-lc": [
"mistralai/Mistral-Nemo-Instruct-2407",
],
"rwvk-14b-lc": [
"m8than/apple-rwkv-1-c-14b",
],
}
def build_model_choices():
all_choices = []
for model_class_name in MODEL_CHOICES:
model_class = MODEL_CHOICES[model_class_name]
all_choices += [ (f"{model_id} ({model_class_name})", model_id) for model_id in model_class ]
return all_choices
model_choices = build_model_choices()
def initial_model(referer=None):
return "mistralai/Mistral-Nemo-Instruct-2407"
# let's use a random but different model each day.
# key=os.environ.get('RANDOM_SEED', 'kcOtfNHA+e')
# o = random.Random(f"{key}-{datetime.date.today().strftime('%Y-%m-%d')}")
# return o.choice(model_choices)[1]
title_text="Klimbr token input pre-processor demo space"
klimbr_url="https://github.com/av/klmbr"
css = """
.logo-mark { fill: #ffe184; }
/* from https://github.com/gradio-app/gradio/issues/4001
* necessary as putting ChatInterface in gr.Blocks changes behaviour
*/
.contain { display: flex; flex-direction: column; }
.gradio-container { height: 100vh !important; }
#component-0 { height: 100%; }
#chatbot { flex-grow: 1; overflow: auto;}
.lead-text {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
padding: 20px;
box-sizing: border-box;
}
.content {
max-width: 60vh;
text-align: center;
font-size: 15pt;
}
.h1 {
margin-bottom: 20px;
}
"""
with gr.Blocks(title_text, css=css) as demo:
gr.HTML(f"""
<div class="lead-text">
<h1 align="center"><a href="{klimbr_url}">Klimbr</a> demo space</h1>
<div class="content">
<p>
Klimbr is a technique to increase entropy in LLM outputs
by adding entropy to the input prompt prior to inference.
</p>
<p>
For details on the technique see <a href="{klimbr_url}">the klimbr github</a>
or the source code of this space.
</p>
</div>
""")
# hidden_state = gr.State(value=initial_model)
percentage = gr.Slider(
minimum=0,
maximum=1,
value=0.65,
label="Percentage of input text to randomize"
)
with gr.Row():
model_selector = gr.Dropdown(
label="Select your Model",
choices=model_choices,
value=initial_model,
# value=hidden_state,
scale=4
)
gr.Button(
value="Visit Model Card ↗️",
scale=1
).click(
inputs=[model_selector],
js="(model_selection) => { window.open(`https://huggingface.co./${model_selection}`, '_blank') }",
fn=None,
)
gr.ChatInterface(
respond,
additional_inputs=[model_selector, percentage],
head=""",
<script>console.log("Hello from gradio!")</script>
""",
concurrency_limit=5
)
gr.HTML(f"""
<p align="center">
Inference by <a href="https://featherless.ai">{logo}</a>
</p>
""")
def update_initial_model_choice(request: gr.Request):
return initial_model(request.headers.get('referer'))
demo.load(update_initial_model_choice, outputs=model_selector)
demo.launch()