try-this-model

Running

App Files Files Community

try-this-model / app.py

wxgeorge

:lock: restrict models to 15B or less.

5ae724e 4 months ago

raw

history blame

3 kB

	from openai import OpenAI
	import gradio as gr
	import os
	import json
	import functools


	api_key = os.environ.get('FEATHERLESS_API_KEY')
	client = OpenAI(
	base_url="https://api.featherless.ai/v1",
	api_key=api_key
	)

	def respond(message, history, model):
	history_openai_format = []
	for human, assistant in history:
	history_openai_format.append({"role": "user", "content": human })
	history_openai_format.append({"role": "assistant", "content":assistant})
	history_openai_format.append({"role": "user", "content": message})

	response = client.chat.completions.create(
	model=model,
	messages= history_openai_format,
	temperature=1.0,
	stream=True,
	max_tokens=2000
	)

	partial_message = ""
	for chunk in response:
	if chunk.choices[0].delta.content is not None:
	partial_message = partial_message + chunk.choices[0].delta.content
	yield partial_message

	logo = open('./logo.svg').read()

	with open('./model-cache.json', 'r') as f_model_cache:
	model_cache = json.load(f_model_cache)

	def build_model_choices():
	all_choices = []
	for model_class in model_cache:
	if model_class in ['llama3-70b-8k', 'qwen2-72b-lc']:
	continue
	all_choices += [ (f"{model_id} ({model_class})", model_id) for model_id in model_cache[model_class] ]

	return all_choices

	model_choices = build_model_choices()

	def initial_model(referer=None):
	print(f"initial_model({referer})")
	if referer == 'http://127.0.0.1:7860/':
	return 'Sao10K/L3-70B-Euryale-v2.1'
	if referer and referer.startswith("https://huggingface.co./"):
	possible_model = referer[23:]
	full_model_list = functools.reduce(lambda x,y: x+y, model_cache.values(), [])
	model_is_supported = possible_model in full_model_list
	if model_is_supported:
	return possible_model

	return 'anakin87/yo-Llama-3-8B-Instruct'

	title_text="HuggingFace's missing inference widget"
	with gr.Blocks(title_text, css='.logo-mark { fill: #ffe184; }') as demo:
	gr.HTML("""
	<h1 align="center">HuggingFace's missing inference widget</h1>
	<p align="center">
	Test any <=15B LLM from the hub.
	</p>
	""")

	# hidden_state = gr.State(value=initial_model)

	model_selector = gr.Dropdown(
	label="Model",
	choices=build_model_choices(),
	value=initial_model
	# value=hidden_state
	)

	gr.ChatInterface(
	respond,
	additional_inputs=[model_selector],
	head=""",
	<script>console.log("Hello from gradio!")</script>
	""",
	)
	gr.HTML(f"""
	<p align="center">
	Inference by <a href="https://featherless.ai">{logo}</a>
	</p>
	""")
	def update_initial_model_choice(request: gr.Request):
	return initial_model(request.headers.get('referer'))

	demo.load(update_initial_model_choice, outputs=model_selector)

	demo.launch()