muqtasid87 commited on
Commit
5a65800
·
verified ·
1 Parent(s): c594eb4
Files changed (4) hide show
  1. app_master.py +106 -0
  2. app_qwen.py +147 -0
  3. florence.py +51 -0
  4. requirements.txt +15 -0
app_master.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import app_qwen
3
+ import project.app_florence as app_florence
4
+ import project.app_combined as app_combined
5
+
6
+ # Set page configuration
7
+ st.set_page_config(
8
+ page_title="Vehicle Analysis Suite",
9
+ page_icon="🚗",
10
+ layout="wide",
11
+ initial_sidebar_state="expanded" # Show sidebar by default
12
+ )
13
+
14
+ # Custom CSS for the sidebar and main content
15
+ st.markdown("""
16
+ <style>
17
+ .block-container {padding-top: 1rem; padding-bottom: 0rem;}
18
+ .element-container {margin-bottom: 0.5rem;}
19
+ .stButton button {width: 100%;}
20
+ h1 {margin-bottom: 1rem;}
21
+ .sidebar-content {
22
+ padding: 1rem;
23
+ }
24
+ .app-header {
25
+ text-align: center;
26
+ padding: 1rem;
27
+ background-color: #f0f2f6;
28
+ border-radius: 0.5rem;
29
+ margin-bottom: 2rem;
30
+ }
31
+ </style>
32
+ """, unsafe_allow_html=True)
33
+
34
+ def main():
35
+ # Sidebar for app selection
36
+ with st.sidebar:
37
+ st.markdown("### 🚗 Vehicle Analysis Suite")
38
+ st.markdown("---")
39
+ app_mode = st.radio(
40
+ "Select Analysis Mode:",
41
+ ["Qwen2-VL Classifier", "Florence-2 Detector", "Combined Pipeline"],
42
+ index=0, # Default to Qwen2-VL
43
+ key="app_selection"
44
+ )
45
+
46
+ st.markdown("---")
47
+ st.markdown("""
48
+ ### About the Models:
49
+
50
+ **Qwen2-VL Classifier**
51
+ - Quick vehicle classification
52
+ - Single-word output
53
+ - Optimized for vehicle types
54
+
55
+ **Florence-2 Detector**
56
+ - Visual object detection
57
+ - Bounding box visualization
58
+ - Detailed spatial analysis
59
+
60
+ **Combined Pipeline**
61
+ - Two-stage analysis
62
+ - Classification + Detection
63
+ - Comprehensive results
64
+ """)
65
+
66
+ # Clear previous app states when switching
67
+ if 'last_app' not in st.session_state:
68
+ st.session_state.last_app = None
69
+
70
+ if st.session_state.last_app != app_mode:
71
+ # Clear relevant session state variables
72
+ for key in list(st.session_state.keys()):
73
+ if key not in ['app_selection', 'last_app']:
74
+ del st.session_state[key]
75
+ st.session_state.last_app = app_mode
76
+
77
+ # Main content area
78
+ if app_mode == "Qwen2-VL Classifier":
79
+ st.markdown("""
80
+ <div class='app-header'>
81
+ <h1>🤖 Qwen2-VL Vehicle Classifier</h1>
82
+ <p>Specialized in quick and accurate vehicle type classification</p>
83
+ </div>
84
+ """, unsafe_allow_html=True)
85
+ app_qwen.main()
86
+
87
+ elif app_mode == "Florence-2 Detector":
88
+ st.markdown("""
89
+ <div class='app-header'>
90
+ <h1>🔍 Florence-2 Vehicle Detector</h1>
91
+ <p>Advanced visual detection with bounding box visualization</p>
92
+ </div>
93
+ """, unsafe_allow_html=True)
94
+ app_florence.main()
95
+
96
+ else: # Combined Pipeline
97
+ st.markdown("""
98
+ <div class='app-header'>
99
+ <h1>🚀 Combined Analysis Pipeline</h1>
100
+ <p>Comprehensive vehicle analysis using both models</p>
101
+ </div>
102
+ """, unsafe_allow_html=True)
103
+ app_combined.main()
104
+
105
+ if __name__ == "__main__":
106
+ main()
app_qwen.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import (
3
+ Qwen2VLForConditionalGeneration,
4
+ AutoProcessor
5
+ )
6
+ import torch
7
+ from PIL import Image
8
+ import time
9
+ import os
10
+
11
+
12
+
13
+ @st.cache_resource
14
+ def load_model():
15
+ """Load the model and processor (cached to prevent reloading)"""
16
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
17
+ "Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4",
18
+ torch_dtype=torch.bfloat16,
19
+ device_map="auto"
20
+ ).eval()
21
+ processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4")
22
+ return model, processor
23
+
24
+ def process_image(image, prompt, model, processor):
25
+ """Process the image and return the model's output"""
26
+ start_time = time.time()
27
+
28
+ conversation = [
29
+ {
30
+ "role": "user",
31
+ "content": [
32
+ {"type": "image"},
33
+ {"type": "text", "text": prompt},
34
+ ],
35
+ },
36
+ ]
37
+
38
+ text_prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
39
+ inputs = processor(text=[text_prompt], images=[image], padding=True, return_tensors="pt").to("cuda")
40
+
41
+ output_ids = model.generate(**inputs, max_new_tokens=100)
42
+ generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, output_ids)]
43
+ output_text = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
44
+
45
+ inference_time = time.time() - start_time
46
+ return output_text[0].strip(), inference_time
47
+
48
+ def main():
49
+ # Compact header
50
+ st.markdown("<h1 style='font-size: 24px;'>🔍 Image Analysis with Qwen2-VL</h1>", unsafe_allow_html=True)
51
+
52
+ # Load model and processor
53
+ with st.spinner("Loading model... This might take a minute."):
54
+ model, processor = load_model()
55
+
56
+ # Initialize session state
57
+ if 'selected_image' not in st.session_state:
58
+ st.session_state.selected_image = None
59
+ if 'result' not in st.session_state:
60
+ st.session_state.result = None
61
+ if 'inference_time' not in st.session_state:
62
+ st.session_state.inference_time = None
63
+
64
+ # Main content area
65
+ col1, col2, col3 = st.columns([1, 1.5, 1])
66
+
67
+ with col1:
68
+ # Input method selection
69
+ input_option = st.radio("Choose input method:", ["Use example image", "Upload image"], label_visibility="collapsed")
70
+
71
+ if input_option == "Upload image":
72
+ uploaded_file = st.file_uploader("Upload Image", type=["jpg", "jpeg", "png"], label_visibility="collapsed")
73
+ image_source = uploaded_file
74
+ if uploaded_file:
75
+ st.session_state.selected_image = uploaded_file
76
+ else:
77
+ image_source = st.session_state.selected_image
78
+
79
+ # Default prompt and analysis section
80
+ default_prompt = "What type of vehicle is this? Choose only from: car, pickup, bus, truck, motorbike, van. Answer only in one word."
81
+ prompt = st.text_area("Enter prompt:", value=default_prompt, height=100)
82
+
83
+ analyze_col1, analyze_col2 = st.columns([1, 2])
84
+ with analyze_col1:
85
+ analyze_button = st.button("Analyze Image", use_container_width=True, disabled=image_source is None)
86
+
87
+ # Display selected image and results
88
+ if image_source:
89
+ try:
90
+ if isinstance(image_source, str):
91
+ image = Image.open(image_source).convert("RGB")
92
+ else:
93
+ image = Image.open(image_source).convert("RGB")
94
+ st.image(image, caption="Selected Image", width=300)
95
+ except Exception as e:
96
+ st.error(f"Error loading image: {str(e)}")
97
+
98
+ # Analysis results
99
+ if analyze_button and image_source:
100
+ with st.spinner("Analyzing..."):
101
+ try:
102
+ result, inference_time = process_image(image, prompt, model, processor)
103
+ st.session_state.result = result
104
+ st.session_state.inference_time = inference_time
105
+ except Exception as e:
106
+ st.error(f"Error: {str(e)}")
107
+
108
+ if st.session_state.result:
109
+ st.success("Analysis Complete!")
110
+ st.markdown(f"**Result:**\n{st.session_state.result}")
111
+ st.markdown(f"*Inference time: {st.session_state.inference_time:.2f} seconds*")
112
+
113
+ # Example images section
114
+ if input_option == "Use example image":
115
+ st.markdown("### Example Images")
116
+ example_images = [f for f in os.listdir("images") if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
117
+
118
+ if example_images:
119
+ # Create grid of images
120
+ cols = st.columns(4) # Adjust number of columns as needed
121
+ for idx, img_name in enumerate(example_images):
122
+ with cols[idx % 4]:
123
+ img_path = os.path.join("images", img_name)
124
+ img = Image.open(img_path)
125
+ img.thumbnail((150, 150))
126
+
127
+ # Make image clickable
128
+ if st.button(
129
+ "📷",
130
+ key=f"img_{idx}",
131
+ help=img_name,
132
+ use_container_width=True
133
+ ):
134
+ st.session_state.selected_image = img_path
135
+ st.rerun()
136
+
137
+ # Display image with conditional styling
138
+ st.image(
139
+ img,
140
+ caption=img_name,
141
+ use_container_width=True,
142
+ )
143
+ else:
144
+ st.error("No example images found in the 'images' directory")
145
+
146
+ if __name__ == "__main__":
147
+ main()
florence.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import (
2
+ Qwen2VLForConditionalGeneration,
3
+ AutoTokenizer,
4
+ AutoProcessor,
5
+ BitsAndBytesConfig,
6
+ pipeline,
7
+ AutoModelForCausalLM)
8
+ from transformers.image_utils import load_image
9
+ import torch
10
+ from PIL import Image
11
+ from tqdm import tqdm
12
+ import os
13
+ import time
14
+ from codecarbon import EmissionsTracker
15
+ import matplotlib.pyplot as plt
16
+ import matplotlib.patches as patches
17
+ import psutil
18
+ import numpy as np
19
+ import requests
20
+
21
+
22
+ #load model and processor
23
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
24
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
25
+
26
+ model_florence = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-large-ft", torch_dtype=torch_dtype, trust_remote_code=True).to(device)
27
+ processor_florence = AutoProcessor.from_pretrained("microsoft/Florence-2-large-ft", trust_remote_code=True)
28
+
29
+ # BBox detection using Florence
30
+ #inference function: input prompt, image_path
31
+ def grounding(image_path, text_input=None, task_prompt = "<CAPTION_TO_PHRASE_GROUNDING>"):
32
+ with open(image_path, "rb") as f:
33
+ image = Image.open(f).convert("RGB")
34
+
35
+ if text_input is None:
36
+ prompt = task_prompt
37
+ else:
38
+ prompt = task_prompt + text_input
39
+
40
+ inputs = processor_florence(text=prompt, images=image, return_tensors="pt").to(device, torch_dtype)
41
+ generated_ids = model_florence.generate(
42
+ input_ids=inputs["input_ids"],
43
+ pixel_values=inputs["pixel_values"],
44
+ max_new_tokens=2048,
45
+ num_beams=3
46
+ )
47
+ generated_text = processor_florence.batch_decode(generated_ids, skip_special_tokens=False)[0]
48
+
49
+ parsed_answer = processor_florence.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
50
+
51
+ return parsed_answer[task_prompt]
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --find-links https://download.pytorch.org/whl/torch_stable.html
2
+ torch==2.2.0+cpu
3
+ Pillow
4
+ transformers
5
+ timm
6
+ auto-gptq
7
+ huggingface-hub
8
+ bitsandbytes
9
+ opencv-python
10
+ streamlit
11
+ numpy
12
+ pandas
13
+ matplotlib
14
+ gradio
15
+