themanas021 commited on
Commit
d436c8a
·
verified ·
1 Parent(s): c2bab7c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -50
app.py CHANGED
@@ -1,22 +1,11 @@
1
  import gradio as gr
2
  import base64
3
  from PIL import Image
4
- import torch
5
- from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
6
- from qwen_vl_utils import process_vision_info
7
-
8
- model = Qwen2VLForConditionalGeneration.from_pretrained(
9
- "Qwen/Qwen2-VL-2B-Instruct",
10
- torch_dtype="auto",
11
- device_map="auto",
12
- )
13
-
14
- processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
15
 
16
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
- model.to(device)
18
 
19
- # Function to encode images into base64
20
  def encode_images(image_paths):
21
  base64_images = []
22
  for image_path in image_paths:
@@ -25,24 +14,22 @@ def encode_images(image_paths):
25
  base64_images.append(f"data:image/jpeg;base64,{base64_image}")
26
  return base64_images
27
 
28
- # Function to resize images to a uniform shape
29
  def resize_images(image_paths, target_size=(224, 224)):
30
  resized_images = []
31
  for image_path in image_paths:
32
  img = Image.open(image_path)
33
- img_resized = img.resize(target_size) # Resize image to target size
34
  resized_images.append(img_resized)
35
  return resized_images
36
 
37
  def generate_testing_instructions(images, context):
38
- # Resize all images to a uniform shape (e.g., 224x224)
39
  resized_images = resize_images(images)
40
 
41
- # Encode resized images to base64
42
  base64_images = encode_images(images)
43
 
44
- # Prepare messages with the base64-encoded images
45
- messages = [
 
46
  {
47
  "role": "user",
48
  "content": [
@@ -70,35 +57,14 @@ Please demonstrate your approach using the following features of a mobile app:
70
  ]
71
  }
72
  for base64_image in base64_images
73
- ]
74
-
75
- text_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
76
-
77
- # Create input tensors
78
- inputs = processor(
79
- text=[text_prompt],
80
- images=resized_images, # Use resized images for model input
81
- padding=True,
82
- return_tensors="pt"
83
- )
84
-
85
- # Move tensors to GPU if available
86
- inputs = inputs.to(device)
87
-
88
- # Generate output
89
- output_ids = model.generate(**inputs, max_new_tokens=1024)
90
-
91
- generated_ids = [
92
- output_ids[len(input_ids):]
93
- for input_ids, output_ids in zip(inputs.input_ids, output_ids)
94
- ]
95
-
96
- # Decode the output text
97
- output_text = processor.batch_decode(
98
- generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
99
- )
100
-
101
- return output_text
102
 
103
  # Create the Gradio interface
104
  with gr.Blocks() as demo:
@@ -113,7 +79,6 @@ with gr.Blocks() as demo:
113
 
114
  button = gr.Button("Describe Testing Instructions")
115
 
116
- # Action on button click
117
  button.click(
118
  generate_testing_instructions,
119
  inputs=[image_upload, context],
 
1
  import gradio as gr
2
  import base64
3
  from PIL import Image
4
+ from groq import Groq
5
+ import os
 
 
 
 
 
 
 
 
 
6
 
7
+ client = Groq(api_key=os.environ.getenv('GROQ_API_KEY'))
 
8
 
 
9
  def encode_images(image_paths):
10
  base64_images = []
11
  for image_path in image_paths:
 
14
  base64_images.append(f"data:image/jpeg;base64,{base64_image}")
15
  return base64_images
16
 
 
17
  def resize_images(image_paths, target_size=(224, 224)):
18
  resized_images = []
19
  for image_path in image_paths:
20
  img = Image.open(image_path)
21
+ img_resized = img.resize(target_size)
22
  resized_images.append(img_resized)
23
  return resized_images
24
 
25
  def generate_testing_instructions(images, context):
 
26
  resized_images = resize_images(images)
27
 
 
28
  base64_images = encode_images(images)
29
 
30
+ completion = client.chat.completions.create(
31
+ model="llava-v1.5-7b-4096-preview",
32
+ messages=[
33
  {
34
  "role": "user",
35
  "content": [
 
57
  ]
58
  }
59
  for base64_image in base64_images
60
+ ],
61
+ temperature=0,
62
+ max_tokens=1024,
63
+ top_p=1,
64
+ stream=False,
65
+ stop=None,
66
+ )
67
+ return completion.choices[0].message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  # Create the Gradio interface
70
  with gr.Blocks() as demo:
 
79
 
80
  button = gr.Button("Describe Testing Instructions")
81
 
 
82
  button.click(
83
  generate_testing_instructions,
84
  inputs=[image_upload, context],