nanollava-gguf / app.py
abetlen's picture
Update
4b12865
import base64
from io import BytesIO
import gradio as gr
import spaces
from llama_cpp import Llama
from llama_cpp.llama_chat_format import NanoLlavaChatHandler
chat_handler = NanoLlavaChatHandler.from_pretrained(
repo_id="abetlen/nanollava-gguf",
filename="*mmproj*",
)
llm = Llama.from_pretrained(
repo_id="abetlen/nanollava-gguf",
filename="*text-model*",
chat_handler=chat_handler,
n_ctx=2048, # n_ctx should be increased to accommodate the image embedding
n_gpu_layers=-1,
flash_attn=True,
)
@spaces.GPU(duration=30)
def answer_question(img, prompt):
img_bytes = BytesIO()
img.save(img_bytes, format='JPEG')
# Encode the bytes object to a base64-encoded string
data_url = 'data:image/jpeg;base64,' + base64.b64encode(img_bytes.getvalue()).decode()
response = llm.create_chat_completion(
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": data_url},
],
}
],
stream=True,
)
for chunk in response:
if "content" in chunk["choices"][0]["delta"]:
yield chunk["choices"][0]["delta"]["content"]
with gr.Blocks() as demo:
gr.Markdown(
"""
# NanoLLaVA
"""
)
with gr.Row():
prompt = gr.Textbox(label="Input", value="Describe this image.", scale=4)
submit = gr.Button("Submit")
with gr.Row():
img = gr.Image(type="pil", label="Upload an Image")
output = gr.TextArea(label="Response")
submit.click(answer_question, [img, prompt], output)
prompt.submit(answer_question, [img, prompt], output)
demo.queue().launch()