Spaces:

abetlen
/

nanollava-gguf

Sleeping

File size: 1,772 Bytes

import base64
from io import BytesIO

import gradio as gr
import spaces

from llama_cpp import Llama
from llama_cpp.llama_chat_format import NanoLlavaChatHandler

chat_handler = NanoLlavaChatHandler.from_pretrained(
    repo_id="abetlen/nanollava-gguf",
    filename="*mmproj*",
)
llm = Llama.from_pretrained(
    repo_id="abetlen/nanollava-gguf",
    filename="*text-model*",
    chat_handler=chat_handler,
    n_ctx=2048,  # n_ctx should be increased to accommodate the image embedding
    n_gpu_layers=-1,
    flash_attn=True,
)

@spaces.GPU(duration=30)
def answer_question(img, prompt):
    img_bytes = BytesIO()
    img.save(img_bytes, format='JPEG')

    # Encode the bytes object to a base64-encoded string
    data_url = 'data:image/jpeg;base64,' + base64.b64encode(img_bytes.getvalue()).decode()

    response = llm.create_chat_completion(
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {"type": "image_url", "image_url": data_url},
                ],
            }
        ],
        stream=True,
    )

    for chunk in response:
        if "content" in chunk["choices"][0]["delta"]:
            yield chunk["choices"][0]["delta"]["content"]


with gr.Blocks() as demo:
    gr.Markdown(
        """
        # NanoLLaVA
        """
    )
    with gr.Row():
        prompt = gr.Textbox(label="Input", value="Describe this image.", scale=4)
        submit = gr.Button("Submit")
    with gr.Row():
        img = gr.Image(type="pil", label="Upload an Image")
        output = gr.TextArea(label="Response")
    submit.click(answer_question, [img, prompt], output)
    prompt.submit(answer_question, [img, prompt], output)

demo.queue().launch()