AdrienB134 commited on
Commit
86019ea
·
verified ·
1 Parent(s): 8575432

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -11
app.py CHANGED
@@ -20,7 +20,7 @@ import time
20
  from PIL import Image
21
  import torch
22
  import subprocess
23
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
24
 
25
 
26
 
@@ -32,11 +32,7 @@ def model_inference(
32
  images, text,
33
  ):
34
 
35
- # print(type(images))
36
- # print(images[0])
37
- # images = Image.open(images[0][0])
38
- # print(images)
39
- # print(type(images))
40
  images = [{"type": "image", "image": Image.open(image[0])} for image in images]
41
  images.append({"type": "text", "text": text})
42
 
@@ -47,7 +43,7 @@ def model_inference(
47
  #We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
48
  model = Qwen2VLForConditionalGeneration.from_pretrained(
49
  "Qwen/Qwen2-VL-2B-Instruct",
50
- attn_implementation="flash_attention_2", #doesn't work on zerogpu WTF?!
51
  trust_remote_code=True,
52
  torch_dtype=torch.bfloat16).to("cuda:0")
53
 
@@ -55,10 +51,6 @@ def model_inference(
55
  min_pixels = 256*28*28
56
  max_pixels = 1280*28*28
57
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
58
-
59
- # The default range for the number of visual tokens per image in the model is 4-16384. You can set min_pixels and max_pixels according to your needs, such as a token count range of 256-1280, to balance speed and memory usage.
60
- # min_pixels = 256*28*28
61
- # max_pixels = 1280*28*28
62
  # processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
63
 
64
  messages = [
 
20
  from PIL import Image
21
  import torch
22
  import subprocess
23
+ #subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
24
 
25
 
26
 
 
32
  images, text,
33
  ):
34
 
35
+
 
 
 
 
36
  images = [{"type": "image", "image": Image.open(image[0])} for image in images]
37
  images.append({"type": "text", "text": text})
38
 
 
43
  #We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
44
  model = Qwen2VLForConditionalGeneration.from_pretrained(
45
  "Qwen/Qwen2-VL-2B-Instruct",
46
+ #attn_implementation="flash_attention_2", #doesn't work on zerogpu WTF?!
47
  trust_remote_code=True,
48
  torch_dtype=torch.bfloat16).to("cuda:0")
49
 
 
51
  min_pixels = 256*28*28
52
  max_pixels = 1280*28*28
53
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
 
 
 
 
54
  # processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
55
 
56
  messages = [