import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig

# 使用你的设备
device = "cuda" if torch.cuda.is_available() else "cpu"

# 指定模型
model_id = "Qwen/Qwen2-VL-7B" # 或者 "Qwen/Qwen2-VL-Chat-7B"

# 加载模型和分词器
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map=device,
    torch_dtype="auto",
    trust_remote_code=True
).eval()

# 设定模型的生成参数
model.generation_config = GenerationConfig.from_pretrained(
    model_id,
    trust_remote_code=True
)
model.generation_config.do_sample = False  # 禁用采样，使用 beam search

def respond(image, prompt, history):
    # 使用模型的 chat 方法进行对话
    response, history = model.chat(tokenizer, image, prompt, history=history)
    return response, history

with gr.Blocks() as demo:
    gr.Markdown(f"## Qwen2-VL-7B Demo (Model: {model_id})")
    with gr.Row():
        with gr.Column(scale=4):
            image = gr.Image(type="pil", label="Image")
            text_input = gr.Textbox(label="Prompt", placeholder="输入提示")
            submit_button = gr.Button("Submit")
        with gr.Column(scale=6):
            chatbot = gr.Chatbot(label="Chatbot")

    history = gr.State([])

    submit_button.click(
        respond,
        inputs=[image, text_input, history],
        outputs=[chatbot, history]
    )

demo.queue().launch(server_name='0.0.0.0', server_port=7860, share=True) # 启用 share=True 以生成公开链接