Spaces:

muqtasid87
/

autolabeling_demo

Running

App Files Files Community

muqtasid87 commited on 5 days ago

Commit

5a65800

verified ·

1 Parent(s): c594eb4

yea man

Browse files

Files changed (4) hide show

app_master.py +106 -0
app_qwen.py +147 -0
florence.py +51 -0
requirements.txt +15 -0

app_master.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import streamlit as st
+import app_qwen
+import project.app_florence as app_florence
+import project.app_combined as app_combined
+# Set page configuration
+st.set_page_config(
+    page_title="Vehicle Analysis Suite",
+    page_icon="🚗",
+    layout="wide",
+    initial_sidebar_state="expanded"  # Show sidebar by default
+)
+# Custom CSS for the sidebar and main content
+st.markdown("""
+    <style>
+        .block-container {padding-top: 1rem; padding-bottom: 0rem;}
+        .element-container {margin-bottom: 0.5rem;}
+        .stButton button {width: 100%;}
+        h1 {margin-bottom: 1rem;}
+        .sidebar-content {
+            padding: 1rem;
+        }
+        .app-header {
+            text-align: center;
+            padding: 1rem;
+            background-color: #f0f2f6;
+            border-radius: 0.5rem;
+            margin-bottom: 2rem;
+        }
+    </style>
+""", unsafe_allow_html=True)
+def main():
+    # Sidebar for app selection
+    with st.sidebar:
+        st.markdown("### 🚗 Vehicle Analysis Suite")
+        st.markdown("---")
+        app_mode = st.radio(
+            "Select Analysis Mode:",
+            ["Qwen2-VL Classifier", "Florence-2 Detector", "Combined Pipeline"],
+            index=0,  # Default to Qwen2-VL
+            key="app_selection"
+        )
+        st.markdown("---")
+        st.markdown("""
+        ### About the Models:
+        **Qwen2-VL Classifier**
+        - Quick vehicle classification
+        - Single-word output
+        - Optimized for vehicle types
+        **Florence-2 Detector**
+        - Visual object detection
+        - Bounding box visualization
+        - Detailed spatial analysis
+        **Combined Pipeline**
+        - Two-stage analysis
+        - Classification + Detection
+        - Comprehensive results
+        """)
+    # Clear previous app states when switching
+    if 'last_app' not in st.session_state:
+        st.session_state.last_app = None
+    if st.session_state.last_app != app_mode:
+        # Clear relevant session state variables
+        for key in list(st.session_state.keys()):
+            if key not in ['app_selection', 'last_app']:
+                del st.session_state[key]
+        st.session_state.last_app = app_mode
+    # Main content area
+    if app_mode == "Qwen2-VL Classifier":
+        st.markdown("""
+            <div class='app-header'>
+                <h1>🤖 Qwen2-VL Vehicle Classifier</h1>
+                <p>Specialized in quick and accurate vehicle type classification</p>
+            </div>
+        """, unsafe_allow_html=True)
+        app_qwen.main()
+    elif app_mode == "Florence-2 Detector":
+        st.markdown("""
+            <div class='app-header'>
+                <h1>🔍 Florence-2 Vehicle Detector</h1>
+                <p>Advanced visual detection with bounding box visualization</p>
+            </div>
+        """, unsafe_allow_html=True)
+        app_florence.main()
+    else:  # Combined Pipeline
+        st.markdown("""
+            <div class='app-header'>
+                <h1>🚀 Combined Analysis Pipeline</h1>
+                <p>Comprehensive vehicle analysis using both models</p>
+            </div>
+        """, unsafe_allow_html=True)
+        app_combined.main()
+if __name__ == "__main__":
+    main()

app_qwen.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import streamlit as st
+from transformers import (
+    Qwen2VLForConditionalGeneration,
+    AutoProcessor
+)
+import torch
+from PIL import Image
+import time
+import os
+@st.cache_resource
+def load_model():
+    """Load the model and processor (cached to prevent reloading)"""
+    model = Qwen2VLForConditionalGeneration.from_pretrained(
+        "Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4",
+        torch_dtype=torch.bfloat16,
+        device_map="auto"
+    ).eval()
+    processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4")
+    return model, processor
+def process_image(image, prompt, model, processor):
+    """Process the image and return the model's output"""
+    start_time = time.time()
+    conversation = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image"},
+                {"type": "text", "text": prompt},
+            ],
+        },
+    ]
+    text_prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
+    inputs = processor(text=[text_prompt], images=[image], padding=True, return_tensors="pt").to("cuda")
+    output_ids = model.generate(**inputs, max_new_tokens=100)
+    generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, output_ids)]
+    output_text = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+    inference_time = time.time() - start_time
+    return output_text[0].strip(), inference_time
+def main():
+    # Compact header
+    st.markdown("<h1 style='font-size: 24px;'>🔍 Image Analysis with Qwen2-VL</h1>", unsafe_allow_html=True)
+    # Load model and processor
+    with st.spinner("Loading model... This might take a minute."):
+        model, processor = load_model()
+    # Initialize session state
+    if 'selected_image' not in st.session_state:
+        st.session_state.selected_image = None
+    if 'result' not in st.session_state:
+        st.session_state.result = None
+    if 'inference_time' not in st.session_state:
+        st.session_state.inference_time = None
+    # Main content area
+    col1, col2, col3 = st.columns([1, 1.5, 1])
+    with col1:
+        # Input method selection
+        input_option = st.radio("Choose input method:", ["Use example image", "Upload image"], label_visibility="collapsed")
+        if input_option == "Upload image":
+            uploaded_file = st.file_uploader("Upload Image", type=["jpg", "jpeg", "png"], label_visibility="collapsed")
+            image_source = uploaded_file
+            if uploaded_file:
+                st.session_state.selected_image = uploaded_file
+        else:
+            image_source = st.session_state.selected_image
+    # Default prompt and analysis section
+    default_prompt = "What type of vehicle is this? Choose only from: car, pickup, bus, truck, motorbike, van. Answer only in one word."
+    prompt = st.text_area("Enter prompt:", value=default_prompt, height=100)
+    analyze_col1, analyze_col2 = st.columns([1, 2])
+    with analyze_col1:
+        analyze_button = st.button("Analyze Image", use_container_width=True, disabled=image_source is None)
+    # Display selected image and results
+    if image_source:
+        try:
+            if isinstance(image_source, str):
+                image = Image.open(image_source).convert("RGB")
+            else:
+                image = Image.open(image_source).convert("RGB")
+            st.image(image, caption="Selected Image", width=300)
+        except Exception as e:
+            st.error(f"Error loading image: {str(e)}")
+    # Analysis results
+    if analyze_button and image_source:
+        with st.spinner("Analyzing..."):
+            try:
+                result, inference_time = process_image(image, prompt, model, processor)
+                st.session_state.result = result
+                st.session_state.inference_time = inference_time
+            except Exception as e:
+                st.error(f"Error: {str(e)}")
+    if st.session_state.result:
+        st.success("Analysis Complete!")
+        st.markdown(f"**Result:**\n{st.session_state.result}")
+        st.markdown(f"*Inference time: {st.session_state.inference_time:.2f} seconds*")
+    # Example images section
+    if input_option == "Use example image":
+        st.markdown("### Example Images")
+        example_images = [f for f in os.listdir("images") if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
+        if example_images:
+            # Create grid of images
+            cols = st.columns(4)  # Adjust number of columns as needed
+            for idx, img_name in enumerate(example_images):
+                with cols[idx % 4]:
+                    img_path = os.path.join("images", img_name)
+                    img = Image.open(img_path)
+                    img.thumbnail((150, 150))
+                    # Make image clickable
+                    if st.button(
+                        "📷",
+                        key=f"img_{idx}",
+                        help=img_name,
+                        use_container_width=True
+                    ):
+                        st.session_state.selected_image = img_path
+                        st.rerun()
+                    # Display image with conditional styling
+                    st.image(
+                        img,
+                        caption=img_name,
+                        use_container_width=True,
+                    )
+        else:
+            st.error("No example images found in the 'images' directory")
+if __name__ == "__main__":
+    main()

florence.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from transformers import (
+    Qwen2VLForConditionalGeneration,
+    AutoTokenizer,
+    AutoProcessor,
+    BitsAndBytesConfig,
+    pipeline,
+    AutoModelForCausalLM)
+from transformers.image_utils import load_image
+import torch
+from PIL import Image
+from tqdm import tqdm
+import os
+import time
+from codecarbon import EmissionsTracker
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+import psutil
+import numpy as np
+import requests
+#load model and processor
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+model_florence = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-large-ft", torch_dtype=torch_dtype, trust_remote_code=True).to(device)
+processor_florence = AutoProcessor.from_pretrained("microsoft/Florence-2-large-ft", trust_remote_code=True)
+# BBox detection using Florence
+#inference function: input prompt, image_path
+def grounding(image_path, text_input=None, task_prompt = "<CAPTION_TO_PHRASE_GROUNDING>"):
+    with open(image_path, "rb") as f:
+              image = Image.open(f).convert("RGB")
+    if text_input is None:
+        prompt = task_prompt
+    else:
+        prompt = task_prompt + text_input
+    inputs = processor_florence(text=prompt, images=image, return_tensors="pt").to(device, torch_dtype)
+    generated_ids = model_florence.generate(
+      input_ids=inputs["input_ids"],
+      pixel_values=inputs["pixel_values"],
+      max_new_tokens=2048,
+      num_beams=3
+    )
+    generated_text = processor_florence.batch_decode(generated_ids, skip_special_tokens=False)[0]
+    parsed_answer = processor_florence.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
+    return parsed_answer[task_prompt]

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+--find-links https://download.pytorch.org/whl/torch_stable.html
+torch==2.2.0+cpu
+Pillow
+transformers
+timm
+auto-gptq
+huggingface-hub
+bitsandbytes
+opencv-python
+streamlit
+numpy
+pandas
+matplotlib
+gradio