Spaces:

abetlen
/

nanollava-gguf

Sleeping

nanollava-gguf / app.py

Update

4b12865 6 months ago

1.77 kB

	import base64
	from io import BytesIO

	import gradio as gr
	import spaces

	from llama_cpp import Llama
	from llama_cpp.llama_chat_format import NanoLlavaChatHandler

	chat_handler = NanoLlavaChatHandler.from_pretrained(
	repo_id="abetlen/nanollava-gguf",
	filename="mmproj",
	)
	llm = Llama.from_pretrained(
	repo_id="abetlen/nanollava-gguf",
	filename="text-model",
	chat_handler=chat_handler,
	n_ctx=2048, # n_ctx should be increased to accommodate the image embedding
	n_gpu_layers=-1,
	flash_attn=True,
	)

	@spaces.GPU(duration=30)
	def answer_question(img, prompt):
	img_bytes = BytesIO()
	img.save(img_bytes, format='JPEG')

	# Encode the bytes object to a base64-encoded string
	data_url = 'data:image/jpeg;base64,' + base64.b64encode(img_bytes.getvalue()).decode()

	response = llm.create_chat_completion(
	messages=[
	{
	"role": "user",
	"content": [
	{"type": "text", "text": prompt},
	{"type": "image_url", "image_url": data_url},
	],
	}
	],
	stream=True,
	)

	for chunk in response:
	if "content" in chunk["choices"][0]["delta"]:
	yield chunk["choices"][0]["delta"]["content"]


	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# NanoLLaVA
	"""
	)
	with gr.Row():
	prompt = gr.Textbox(label="Input", value="Describe this image.", scale=4)
	submit = gr.Button("Submit")
	with gr.Row():
	img = gr.Image(type="pil", label="Upload an Image")
	output = gr.TextArea(label="Response")
	submit.click(answer_question, [img, prompt], output)
	prompt.submit(answer_question, [img, prompt], output)

	demo.queue().launch()