Spaces:
Sleeping
Sleeping
File size: 1,772 Bytes
1699806 4b12865 1699806 4b12865 1699806 a1e4ad2 1699806 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import base64
from io import BytesIO
import gradio as gr
import spaces
from llama_cpp import Llama
from llama_cpp.llama_chat_format import NanoLlavaChatHandler
chat_handler = NanoLlavaChatHandler.from_pretrained(
repo_id="abetlen/nanollava-gguf",
filename="*mmproj*",
)
llm = Llama.from_pretrained(
repo_id="abetlen/nanollava-gguf",
filename="*text-model*",
chat_handler=chat_handler,
n_ctx=2048, # n_ctx should be increased to accommodate the image embedding
n_gpu_layers=-1,
flash_attn=True,
)
@spaces.GPU(duration=30)
def answer_question(img, prompt):
img_bytes = BytesIO()
img.save(img_bytes, format='JPEG')
# Encode the bytes object to a base64-encoded string
data_url = 'data:image/jpeg;base64,' + base64.b64encode(img_bytes.getvalue()).decode()
response = llm.create_chat_completion(
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": data_url},
],
}
],
stream=True,
)
for chunk in response:
if "content" in chunk["choices"][0]["delta"]:
yield chunk["choices"][0]["delta"]["content"]
with gr.Blocks() as demo:
gr.Markdown(
"""
# NanoLLaVA
"""
)
with gr.Row():
prompt = gr.Textbox(label="Input", value="Describe this image.", scale=4)
submit = gr.Button("Submit")
with gr.Row():
img = gr.Image(type="pil", label="Upload an Image")
output = gr.TextArea(label="Response")
submit.click(answer_question, [img, prompt], output)
prompt.submit(answer_question, [img, prompt], output)
demo.queue().launch()
|