tanshaohui commited on
Commit
1e8121a
·
1 Parent(s): ec9d468

feat: qwen2-vl 测试

Browse files
Files changed (2) hide show
  1. app.py +46 -6
  2. requirements.txt +2 -0
app.py CHANGED
@@ -1,14 +1,54 @@
1
  import gradio as gr
2
  import spaces
3
  import torch
 
 
4
 
5
- zero = torch.Tensor([0]).cuda()
6
- print(zero.device) # <-- 'cpu' 🤔
 
 
 
 
 
7
 
8
  @spaces.GPU
9
- def greet(n):
10
- print(zero.device) # <-- 'cuda:0' 🤗
11
- return f"Hello {zero + n} Tensor"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
14
  demo.launch()
 
1
  import gradio as gr
2
  import spaces
3
  import torch
4
+ import os
5
+ import subprocess
6
 
7
+ os.system("pip install git+https://github.com/huggingface/transformers")
8
+
9
+ from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
10
+ from qwen_vl_utils import process_vision_info
11
+
12
+ model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct").cuda()
13
+ processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
14
 
15
  @spaces.GPU
16
+ def infer(n):
17
+ if len(n) < 1:
18
+ n = "请将图里文字转成markdown"
19
+ messages = [
20
+ {
21
+ "role": "user",
22
+ "content": [
23
+ {
24
+ "type": "image",
25
+ "image": "https://lf3-static.bytednsdoc.com/obj/eden-cn/pbovhozuha/screenshot-20240923-164458.png",
26
+ },
27
+ {"type": "text", "text": n},
28
+ ],
29
+ }
30
+ ]
31
+ text = processor.apply_chat_template(
32
+ messages, tokenize=False, add_generation_prompt=True
33
+ )
34
+ image_inputs, video_inputs = process_vision_info(messages)
35
+ inputs = processor(
36
+ text=[text],
37
+ images=image_inputs,
38
+ videos=video_inputs,
39
+ padding=True,
40
+ return_tensors="pt",
41
+ )
42
+ inputs = inputs.to(model.device)
43
+ generated_ids = model.generate(**inputs, max_new_tokens=512)
44
+ generated_ids_trimmed = [
45
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
46
+ ]
47
+ output_text = processor.batch_decode(
48
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
49
+ )
50
+ return output_text
51
+
52
 
53
+ demo = gr.Interface(fn=infer, inputs=gr.Number(), outputs=gr.Text())
54
  demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers
2
+ qwen-vl-utils