import base64 from io import BytesIO import gradio as gr import spaces from llama_cpp import Llama from llama_cpp.llama_chat_format import NanoLlavaChatHandler chat_handler = NanoLlavaChatHandler.from_pretrained( repo_id="abetlen/nanollava-gguf", filename="*mmproj*", ) llm = Llama.from_pretrained( repo_id="abetlen/nanollava-gguf", filename="*text-model*", chat_handler=chat_handler, n_ctx=2048, # n_ctx should be increased to accommodate the image embedding n_gpu_layers=-1, flash_attn=True, ) @spaces.GPU(duration=30) def answer_question(img, prompt): img_bytes = BytesIO() img.save(img_bytes, format='JPEG') # Encode the bytes object to a base64-encoded string data_url = 'data:image/jpeg;base64,' + base64.b64encode(img_bytes.getvalue()).decode() response = llm.create_chat_completion( messages=[ { "role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image_url", "image_url": data_url}, ], } ], stream=True, ) for chunk in response: if "content" in chunk["choices"][0]["delta"]: yield chunk["choices"][0]["delta"]["content"] with gr.Blocks() as demo: gr.Markdown( """ # NanoLLaVA """ ) with gr.Row(): prompt = gr.Textbox(label="Input", value="Describe this image.", scale=4) submit = gr.Button("Submit") with gr.Row(): img = gr.Image(type="pil", label="Upload an Image") output = gr.TextArea(label="Response") submit.click(answer_question, [img, prompt], output) prompt.submit(answer_question, [img, prompt], output) demo.queue().launch()