Spaces:

chansung
/

llama2-with-gradio-chat

Running on T4

App Files Files Community

llama2-with-gradio-chat / app.py

chansung

Update app.py

7342691 about 1 year ago

raw

history blame

17 kB

	import os
	import re
	import json
	import copy
	import gradio as gr

	from llama2 import GradioLLaMA2ChatPPManager
	from llama2 import gen_text

	from styles import MODEL_SELECTION_CSS
	from js import GET_LOCAL_STORAGE, UPDATE_LEFT_BTNS_STATE, UPDATE_PLACEHOLDERS
	from templates import templates
	from constants import DEFAULT_GLOBAL_CTX

	from pingpong import PingPong
	from pingpong.context import CtxLastWindowStrategy

	TOKEN = os.getenv('HF_TOKEN')
	MODEL_ID = 'meta-llama/Llama-2-70b-chat-hf'

	def build_prompts(ppmanager, global_context, win_size=3):
	dummy_ppm = copy.deepcopy(ppmanager)
	dummy_ppm.ctx = global_context
	lws = CtxLastWindowStrategy(win_size)
	return lws(dummy_ppm)

	ex_file = open("examples.txt", "r")
	examples = ex_file.read().split("\n")
	ex_btns = []

	chl_file = open("channels.txt", "r")
	channels = chl_file.read().split("\n")
	channel_btns = []

	def get_placeholders(text):
	"""Returns all substrings in between <placeholder> and </placeholder>."""
	pattern = r"\[([^\]]*)\]"
	matches = re.findall(pattern, text)
	return matches

	def fill_up_placeholders(txt):
	placeholders = get_placeholders(txt)
	highlighted_txt = txt

	return (
	gr.update(
	visible=True,
	value=highlighted_txt
	),
	gr.update(
	visible=True if len(placeholders) >= 1 else False,
	placeholder=placeholders[0] if len(placeholders) >= 1 else ""
	),
	gr.update(
	visible=True if len(placeholders) >= 2 else False,
	placeholder=placeholders[1] if len(placeholders) >= 2 else ""
	),
	gr.update(
	visible=True if len(placeholders) >= 3 else False,
	placeholder=placeholders[2] if len(placeholders) >= 3 else ""
	),
	"" if len(placeholders) >= 1 else txt
	)

	async def rollback_last(
	idx, local_data, chat_state,
	global_context, res_temp, res_topk, res_rpen, res_mnts, res_sample, ctx_num_lconv
	):
	res = [
	chat_state["ppmanager_type"].from_json(json.dumps(ppm))
	for ppm in local_data
	]

	ppm = res[idx]
	last_user_message = res[idx].pingpongs[-1].ping
	res[idx].pingpongs = res[idx].pingpongs[:-1]

	ppm.add_pingpong(
	PingPong(last_user_message, "")
	)
	prompt = build_prompts(ppm, global_context, ctx_num_lconv)
	async for result in gen_text(
	prompt, hf_model=MODEL_ID, hf_token=TOKEN,
	parameters={
	'max_new_tokens': res_mnts,
	'do_sample': res_sample,
	'return_full_text': False,
	'temperature': res_temp,
	'top_k': res_topk,
	'repetition_penalty': res_rpen
	}
	):
	ppm.append_pong(result)
	yield prompt, ppm.build_uis(), str(res), gr.update(interactive=False)

	yield prompt, ppm.build_uis(), str(res), gr.update(interactive=True)

	def reset_chat(idx, ld, state):
	res = [state["ppmanager_type"].from_json(json.dumps(ppm_str)) for ppm_str in ld]
	res[idx].pingpongs = []

	return (
	"",
	[],
	str(res),
	gr.update(visible=True),
	gr.update(interactive=False),
	)

	async def chat_stream(
	idx, local_data, instruction_txtbox, chat_state,
	global_context, res_temp, res_topk, res_rpen, res_mnts, res_sample, ctx_num_lconv
	):
	res = [
	chat_state["ppmanager_type"].from_json(json.dumps(ppm))
	for ppm in local_data
	]

	ppm = res[idx]
	ppm.add_pingpong(
	PingPong(instruction_txtbox, "")
	)
	prompt = build_prompts(ppm, global_context, ctx_num_lconv)
	async for result in gen_text(
	prompt, hf_model=MODEL_ID, hf_token=TOKEN,
	parameters={
	'max_new_tokens': res_mnts,
	'do_sample': res_sample,
	'return_full_text': False,
	'temperature': res_temp,
	'top_k': res_topk,
	'repetition_penalty': res_rpen
	}
	):
	ppm.append_pong(result)
	yield "", prompt, ppm.build_uis(), str(res), gr.update(interactive=False)

	yield "", prompt, ppm.build_uis(), str(res), gr.update(interactive=True)

	def channel_num(btn_title):
	choice = 0

	for idx, channel in enumerate(channels):
	if channel == btn_title:
	choice = idx

	return choice

	def set_chatbot(btn, ld, state):
	choice = channel_num(btn)

	res = [state["ppmanager_type"].from_json(json.dumps(ppm_str)) for ppm_str in ld]
	empty = len(res[choice].pingpongs) == 0
	return (res[choice].build_uis(), choice, gr.update(visible=empty), gr.update(interactive=not empty))

	def set_example(btn):
	return btn, gr.update(visible=False)

	def get_final_template(
	txt, placeholder_txt1, placeholder_txt2, placeholder_txt3
	):
	placeholders = get_placeholders(txt)
	example_prompt = txt

	if len(placeholders) >= 1:
	if placeholder_txt1 != "":
	example_prompt = example_prompt.replace(f"[{placeholders[0]}]", placeholder_txt1)
	if len(placeholders) >= 2:
	if placeholder_txt2 != "":
	example_prompt = example_prompt.replace(f"[{placeholders[1]}]", placeholder_txt2)
	if len(placeholders) >= 3:
	if placeholder_txt3 != "":
	example_prompt = example_prompt.replace(f"[{placeholders[2]}]", placeholder_txt3)

	return (
	example_prompt,
	"",
	"",
	""
	)

	with gr.Blocks(css=MODEL_SELECTION_CSS, theme='gradio/soft') as demo:
	with gr.Column() as chat_view:
	idx = gr.State(0)
	chat_state = gr.State({
	"ppmanager_type": GradioLLaMA2ChatPPManager
	})
	local_data = gr.JSON({}, visible=False)

	gr.Markdown("## LLaMA2 70B with Gradio Chat and Hugging Face Inference API", elem_classes=["center"])
	gr.Markdown(
	"This space demonstrates how to build feature rich chatbot UI in [Gradio](https://www.gradio.app/). Supported features "
	"include • multiple chatting channels, • chat history save/restoration, • stop generating text response, • regenerate the "
	"last conversation, • clean the chat history, • dynamic kick-starting prompt templates, • adjusting text generation parameters, "
	"• inspecting the actual prompt that the model sees. The underlying Large Language Model is the [Meta AI](https://ai.meta.com/)'s "
	"[LLaMA2-70B](https://huggingface.co./meta-llama/Llama-2-70b-chat-hf) which is hosted as [Hugging Face Inference API](https://huggingface.co./inference-api), "
	"and [Text Generation Inference](https://github.com/huggingface/text-generation-inference) is the underlying serving framework. ",
	elem_classes=["center"]
	)
	gr.Markdown(
	"*NOTE:* If you are subscribing [PRO](https://huggingface.co./pricing#pro), you can simply duplicate this space and use your "
	"Hugging Face Access Token to run the same application. Just add `HF_TOKEN` secret with the Token value accorindg to [this guide](https://huggingface.co./docs/hub/spaces-overview#managing-secrets-and-environment-variables)",
	elem_classes=["center"]
	)


	with gr.Row():
	with gr.Column(scale=1, min_width=180):
	gr.Markdown("GradioChat", elem_id="left-top")

	with gr.Column(elem_id="left-pane"):
	with gr.Accordion("Histories", elem_id="chat-history-accordion", open=True):
	channel_btns.append(gr.Button(channels[0], elem_classes=["custom-btn-highlight"]))

	for channel in channels[1:]:
	channel_btns.append(gr.Button(channel, elem_classes=["custom-btn"]))

	with gr.Column(scale=8, elem_id="right-pane"):
	with gr.Column(
	elem_id="initial-popup", visible=False
	) as example_block:
	with gr.Row(scale=1):
	with gr.Column(elem_id="initial-popup-left-pane"):
	gr.Markdown("GradioChat", elem_id="initial-popup-title")
	gr.Markdown("Making the community's best AI chat models available to everyone.")
	with gr.Column(elem_id="initial-popup-right-pane"):
	gr.Markdown("Chat UI is now open sourced on Hugging Face Hub")
	gr.Markdown("check out the [↗ repository](https://huggingface.co./spaces/chansung/test-multi-conv)")

	with gr.Column(scale=1):
	gr.Markdown("Examples")
	with gr.Row():
	for example in examples:
	ex_btns.append(gr.Button(example, elem_classes=["example-btn"]))

	with gr.Column(elem_id="aux-btns-popup", visible=True):
	with gr.Row():
	# stop = gr.Button("Stop", elem_classes=["aux-btn"])
	regenerate = gr.Button("Regen", interactive=False, elem_classes=["aux-btn"])
	clean = gr.Button("Clean", elem_classes=["aux-btn"])

	with gr.Accordion("Context Inspector", elem_id="aux-viewer", open=False):
	context_inspector = gr.Textbox(
	"",
	elem_id="aux-viewer-inspector",
	label="",
	lines=30,
	max_lines=50,
	)

	chatbot = gr.Chatbot(elem_id='chatbot', label="LLaMA2-70B-Chat")
	instruction_txtbox = gr.Textbox(placeholder="Ask anything", label="", elem_id="prompt-txt")

	with gr.Accordion("Example Templates", open=False):
	template_txt = gr.Textbox(visible=False)
	template_md = gr.Markdown(label="Chosen Template", visible=False, elem_classes="template-txt")

	with gr.Row():
	placeholder_txt1 = gr.Textbox(label="placeholder #1", visible=False, interactive=True)
	placeholder_txt2 = gr.Textbox(label="placeholder #2", visible=False, interactive=True)
	placeholder_txt3 = gr.Textbox(label="placeholder #3", visible=False, interactive=True)

	for template in templates:
	with gr.Tab(template['title']):
	gr.Examples(
	template['template'],
	inputs=[template_txt],
	outputs=[template_md, placeholder_txt1, placeholder_txt2, placeholder_txt3, instruction_txtbox],
	run_on_click=True,
	fn=fill_up_placeholders,
	)

	with gr.Accordion("Control Panel", open=False) as control_panel:
	with gr.Column():
	with gr.Column():
	gr.Markdown("#### Global context")
	with gr.Accordion("global context will persist during conversation, and it is placed at the top of the prompt", open=True):
	global_context = gr.Textbox(
	DEFAULT_GLOBAL_CTX,
	lines=5,
	max_lines=10,
	interactive=True,
	elem_id="global-context"
	)

	# gr.Markdown("#### Internet search")
	# with gr.Row():
	# internet_option = gr.Radio(choices=["on", "off"], value="off", label="mode")
	# serper_api_key = gr.Textbox(
	# value= "" if args.serper_api_key is None else args.serper_api_key,
	# placeholder="Get one by visiting serper.dev",
	# label="Serper api key"
	# )

	gr.Markdown("#### GenConfig for response text generation")
	with gr.Row():
	res_temp = gr.Slider(0.0, 2.0, 1.0, step=0.1, label="temp", interactive=True)
	res_topk = gr.Slider(20, 1000, 50, step=1, label="top_k", interactive=True)
	res_rpen = gr.Slider(0.0, 2.0, 1.2, step=0.1, label="rep_penalty", interactive=True)
	res_mnts = gr.Slider(64, 8192, 512, step=1, label="new_tokens", interactive=True)
	res_sample = gr.Radio([True, False], value=True, label="sample", interactive=True)

	with gr.Column():
	gr.Markdown("#### Context managements")
	with gr.Row():
	ctx_num_lconv = gr.Slider(2, 10, 3, step=1, label="number of recent talks to keep", interactive=True)

	send_event = instruction_txtbox.submit(
	lambda: [
	gr.update(visible=False),
	gr.update(interactive=True)
	],
	None,
	[example_block, regenerate]
	).then(
	chat_stream,
	[idx, local_data, instruction_txtbox, chat_state,
	global_context, res_temp, res_topk, res_rpen, res_mnts, res_sample, ctx_num_lconv],
	[instruction_txtbox, context_inspector, chatbot, local_data, regenerate]
	).then(
	None, local_data, None,
	_js="(v)=>{ setStorage('local_data',v) }"
	)

	# regen_event1 = regenerate.click(
	# rollback_last,
	# [idx, local_data, chat_state],
	# [instruction_txtbox, chatbot, local_data, regenerate]
	# )
	# regen_event2 = regen_event1.then(
	# chat_stream,
	# [idx, local_data, instruction_txtbox, chat_state,
	# global_context, res_temp, res_topk, res_rpen, res_mnts, res_sample, ctx_num_lconv],
	# [context_inspector, chatbot, local_data]
	# )
	# regen_event3 = regen_event2.then(
	# lambda: gr.update(interactive=True),
	# None,
	# regenerate
	# )
	# regen_event4 = regen_event3.then(
	# None, local_data, None,
	# _js="(v)=>{ setStorage('local_data',v) }"
	# )

	regen_event = regenerate.click(
	rollback_last,
	[idx, local_data, chat_state,
	global_context, res_temp, res_topk, res_rpen, res_mnts, res_sample, ctx_num_lconv],
	[context_inspector, chatbot, local_data, regenerate]
	).then(
	None, local_data, None,
	_js="(v)=>{ setStorage('local_data',v) }"
	)

	# stop.click(
	# lambda: gr.update(interactive=True), None, regenerate,
	# cancels=[send_event, regen_event]
	# )

	for btn in channel_btns:
	btn.click(
	set_chatbot,
	[btn, local_data, chat_state],
	[chatbot, idx, example_block, regenerate]
	).then(
	None, btn, None,
	_js=UPDATE_LEFT_BTNS_STATE
	)

	for btn in ex_btns:
	btn.click(
	set_example,
	[btn],
	[instruction_txtbox, example_block]
	)

	clean.click(
	reset_chat,
	[idx, local_data, chat_state],
	[instruction_txtbox, chatbot, local_data, example_block, regenerate]
	).then(
	None, local_data, None,
	_js="(v)=>{ setStorage('local_data',v) }"
	)


	placeholder_txt1.change(
	inputs=[template_txt, placeholder_txt1, placeholder_txt2, placeholder_txt3],
	outputs=[template_md],
	show_progress=False,
	_js=UPDATE_PLACEHOLDERS,
	fn=None
	)

	placeholder_txt2.change(
	inputs=[template_txt, placeholder_txt1, placeholder_txt2, placeholder_txt3],
	outputs=[template_md],
	show_progress=False,
	_js=UPDATE_PLACEHOLDERS,
	fn=None
	)

	placeholder_txt3.change(
	inputs=[template_txt, placeholder_txt1, placeholder_txt2, placeholder_txt3],
	outputs=[template_md],
	show_progress=False,
	_js=UPDATE_PLACEHOLDERS,
	fn=None
	)

	placeholder_txt1.submit(
	inputs=[template_txt, placeholder_txt1, placeholder_txt2, placeholder_txt3],
	outputs=[instruction_txtbox, placeholder_txt1, placeholder_txt2, placeholder_txt3],
	fn=get_final_template
	)

	placeholder_txt2.submit(
	inputs=[template_txt, placeholder_txt1, placeholder_txt2, placeholder_txt3],
	outputs=[instruction_txtbox, placeholder_txt1, placeholder_txt2, placeholder_txt3],
	fn=get_final_template
	)

	placeholder_txt3.submit(
	inputs=[template_txt, placeholder_txt1, placeholder_txt2, placeholder_txt3],
	outputs=[instruction_txtbox, placeholder_txt1, placeholder_txt2, placeholder_txt3],
	fn=get_final_template
	)

	demo.load(
	None,
	inputs=None,
	outputs=[chatbot, local_data],
	_js=GET_LOCAL_STORAGE,
	)

	demo.queue(concurrency_count=5, max_size=256).launch()