import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer from transformers.generation import GenerationConfig # 使用你的设备 device = "cuda" if torch.cuda.is_available() else "cpu" # 指定模型 model_id = "Qwen/Qwen2-VL-7B" # 或者 "Qwen/Qwen2-VL-Chat-7B" # 加载模型和分词器 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_id, device_map=device, torch_dtype="auto", trust_remote_code=True ).eval() # 设定模型的生成参数 model.generation_config = GenerationConfig.from_pretrained( model_id, trust_remote_code=True ) model.generation_config.do_sample = False # 禁用采样,使用 beam search def respond(image, prompt, history): # 使用模型的 chat 方法进行对话 response, history = model.chat(tokenizer, image, prompt, history=history) return response, history with gr.Blocks() as demo: gr.Markdown(f"## Qwen2-VL-7B Demo (Model: {model_id})") with gr.Row(): with gr.Column(scale=4): image = gr.Image(type="pil", label="Image") text_input = gr.Textbox(label="Prompt", placeholder="输入提示") submit_button = gr.Button("Submit") with gr.Column(scale=6): chatbot = gr.Chatbot(label="Chatbot") history = gr.State([]) submit_button.click( respond, inputs=[image, text_input, history], outputs=[chatbot, history] ) demo.queue().launch(server_name='0.0.0.0', server_port=7860, share=True) # 启用 share=True 以生成公开链接