File size: 3,772 Bytes
87ce80f d8ff437 77d3dbe 62d3eaa 87ce80f 62d3eaa 7b6d332 942dab0 8f1cf32 62d3eaa 4e4190b 8f1cf32 fab8ffe 219615a 09818f0 62d3eaa 942dab0 0004fe0 8e1991d 8f1cf32 4e4190b 09818f0 8f1cf32 62d3eaa 4e4190b 62d3eaa 0c4ca35 62d3eaa 0c4ca35 62d3eaa 0c4ca35 e719a30 dceb63e 62d3eaa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import gradio as gr
from gradio_client import Client, handle_file
from huggingface_hub import InferenceClient
from PIL import Image
from threading import Thread
import time
# Initialize clients for Moondream and QwQ
moondream_client = Client("vikhyatk/moondream2")
qwq_client = InferenceClient("Qwen/QwQ-32B-Preview")
# Function to describe the image using Moondream API
def describe_image(image, user_message):
result = moondream_client.predict(
img=handle_file(image),
prompt="Describe this image.",
api_name="/answer_question"
)
description = result
user_message = description + "\n" + user_message
# Using QwQ model for conversation after description
qwq_result = qwq_client.chat_completion(
messages=[{"role": "user", "content": user_message}],
max_tokens=512,
temperature=0.7,
top_p=0.95
)
return qwq_result['choices'][0]['message']['content']
# Function to handle chat or image-based conversation
def chat_or_image(message, history, max_new_tokens=250):
txt = message["text"]
ext_buffer = f"{txt}"
messages = []
images = []
# Process the conversation history
for i, msg in enumerate(history):
if isinstance(msg[0], tuple):
messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image"}]})
messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
images.append(Image.open(msg[0][0]).convert("RGB"))
elif isinstance(msg[0], str) and isinstance(history[i-1][0], str): # text only turn
messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
# Add current message
if len(message["files"]) == 1:
if isinstance(message["files"][0], str): # Example images
image = Image.open(message["files"][0]).convert("RGB")
else: # Regular image input
image = Image.open(message["files"][0]["path"]).convert("RGB")
images.append(image)
messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image"}]})
else:
messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
# Processing the conversation to send to the model
texts = moondream_client.apply_chat_template(messages, add_generation_prompt=True)
if images == []:
inputs = moondream_client(text=texts, return_tensors="pt").to("cuda")
else:
inputs = moondream_client(text=texts, images=images, return_tensors="pt").to("cuda")
streamer = TextIteratorStreamer(moondream_client, skip_special_tokens=True, skip_prompt=True)
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
generated_text = ""
# Generating the response with threading to avoid blocking
thread = Thread(target=qwq_client.chat_completion, kwargs=generation_kwargs)
thread.start()
buffer = ""
# Stream the generated text
for new_text in streamer:
buffer += new_text
generated_text_without_prompt = buffer
time.sleep(0.01)
yield buffer
# Gradio Interface setup
demo = gr.Interface(
fn=chat_or_image,
inputs=[
gr.Image(type="filepath", label="Upload image (Optional)"),
gr.Textbox(label="Ask anything", placeholder="Ask...", lines=2)
],
outputs="text",
title="Multimodal Llama Chatbot",
description="Interact with the Llama chatbot. Upload an image, ask a question, or both!",
live=True
)
if __name__ == "__main__":
demo.launch(show_error=True)
|