try-this-model / app.py
wxgeorge's picture
:lock: restrict models to 15B or less.
5ae724e
raw
history blame
3 kB
from openai import OpenAI
import gradio as gr
import os
import json
import functools
api_key = os.environ.get('FEATHERLESS_API_KEY')
client = OpenAI(
base_url="https://api.featherless.ai/v1",
api_key=api_key
)
def respond(message, history, model):
history_openai_format = []
for human, assistant in history:
history_openai_format.append({"role": "user", "content": human })
history_openai_format.append({"role": "assistant", "content":assistant})
history_openai_format.append({"role": "user", "content": message})
response = client.chat.completions.create(
model=model,
messages= history_openai_format,
temperature=1.0,
stream=True,
max_tokens=2000
)
partial_message = ""
for chunk in response:
if chunk.choices[0].delta.content is not None:
partial_message = partial_message + chunk.choices[0].delta.content
yield partial_message
logo = open('./logo.svg').read()
with open('./model-cache.json', 'r') as f_model_cache:
model_cache = json.load(f_model_cache)
def build_model_choices():
all_choices = []
for model_class in model_cache:
if model_class in ['llama3-70b-8k', 'qwen2-72b-lc']:
continue
all_choices += [ (f"{model_id} ({model_class})", model_id) for model_id in model_cache[model_class] ]
return all_choices
model_choices = build_model_choices()
def initial_model(referer=None):
print(f"initial_model({referer})")
if referer == 'http://127.0.0.1:7860/':
return 'Sao10K/L3-70B-Euryale-v2.1'
if referer and referer.startswith("https://huggingface.co./"):
possible_model = referer[23:]
full_model_list = functools.reduce(lambda x,y: x+y, model_cache.values(), [])
model_is_supported = possible_model in full_model_list
if model_is_supported:
return possible_model
return 'anakin87/yo-Llama-3-8B-Instruct'
title_text="HuggingFace's missing inference widget"
with gr.Blocks(title_text, css='.logo-mark { fill: #ffe184; }') as demo:
gr.HTML("""
<h1 align="center">HuggingFace's missing inference widget</h1>
<p align="center">
Test any <=15B LLM from the hub.
</p>
""")
# hidden_state = gr.State(value=initial_model)
model_selector = gr.Dropdown(
label="Model",
choices=build_model_choices(),
value=initial_model
# value=hidden_state
)
gr.ChatInterface(
respond,
additional_inputs=[model_selector],
head=""",
<script>console.log("Hello from gradio!")</script>
""",
)
gr.HTML(f"""
<p align="center">
Inference by <a href="https://featherless.ai">{logo}</a>
</p>
""")
def update_initial_model_choice(request: gr.Request):
return initial_model(request.headers.get('referer'))
demo.load(update_initial_model_choice, outputs=model_selector)
demo.launch()