Spaces:

muqtasid87
/

autolabeling_demo

Sleeping

App Files Files Community

muqtasid87 commited on 15 days ago

Commit

067fa6f

verified ·

1 Parent(s): 5a65800

si senor

Browse files

Files changed (18) hide show

images/bike.jpg +0 -0
images/bus.jpg +0 -0
images/car.jpg +0 -0
images/pickup.jpg +0 -0
images/truck.jpg +0 -0
images/van.jpg +0 -0
project/__pycache__/app_combined.cpython-311.pyc +0 -0
project/__pycache__/app_florence.cpython-311.pyc +0 -0
project/__pycache__/app_qwen.cpython-311.pyc +0 -0
project/app_combined.py +245 -0
project/app_florence.py +223 -0
project/app_master.py +106 -0
project/images/bike.jpg +0 -0
project/images/bus.jpg +0 -0
project/images/car.jpg +0 -0
project/images/pickup.jpg +0 -0
project/images/truck.jpg +0 -0
project/images/van.jpg +0 -0

images/bike.jpg ADDED Viewed

images/bus.jpg ADDED Viewed

images/car.jpg ADDED Viewed

images/pickup.jpg ADDED Viewed

images/truck.jpg ADDED Viewed

images/van.jpg ADDED Viewed

project/__pycache__/app_combined.cpython-311.pyc ADDED Viewed

Binary file (12.9 kB). View file

project/__pycache__/app_florence.cpython-311.pyc ADDED Viewed

Binary file (10.6 kB). View file

project/__pycache__/app_qwen.cpython-311.pyc ADDED Viewed

Binary file (9.27 kB). View file

project/app_combined.py ADDED Viewed

	@@ -0,0 +1,245 @@

+import streamlit as st
+from transformers import (
+    Qwen2VLForConditionalGeneration,
+    AutoModelForCausalLM,
+    AutoProcessor
+)
+import torch
+from PIL import Image
+import time
+import os
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+import io
+import numpy as np
+@st.cache_resource
+def load_models():
+    """Load both models and processors"""
+    # Load Qwen model
+    qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
+        "Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4",
+        torch_dtype=torch.bfloat16,
+        device_map="auto"
+    ).eval()
+    qwen_processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4")
+    # Load Florence model
+    device = "cuda:0" if torch.cuda.is_available() else "cpu"
+    torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+    florence_model = AutoModelForCausalLM.from_pretrained(
+        "microsoft/Florence-2-large-ft",
+        torch_dtype=torch_dtype,
+        trust_remote_code=True
+    ).to(device)
+    florence_processor = AutoProcessor.from_pretrained(
+        "microsoft/Florence-2-large-ft",
+        trust_remote_code=True
+    )
+    return qwen_model, qwen_processor, florence_model, florence_processor, device, torch_dtype
+def process_qwen(image, prompt, model, processor):
+    """Process image with Qwen2-VL"""
+    start_time = time.time()
+    conversation = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image"},
+                {"type": "text", "text": prompt},
+            ],
+        },
+    ]
+    text_prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
+    inputs = processor(text=[text_prompt], images=[image], padding=True, return_tensors="pt").to("cuda")
+    output_ids = model.generate(**inputs, max_new_tokens=100)
+    generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, output_ids)]
+    output_text = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+    inference_time = time.time() - start_time
+    return output_text[0].strip(), inference_time
+def draw_bounding_boxes(image, bboxes, labels):
+    """Draw bounding boxes and labels on the image"""
+    img_array = np.array(image)
+    fig, ax = plt.subplots()
+    ax.imshow(img_array)
+    for bbox, label in zip(bboxes, labels):
+        x, y, x2, y2 = bbox
+        width = x2 - x
+        height = y2 - y
+        rect = patches.Rectangle(
+            (x, y), width, height,
+            linewidth=2,
+            edgecolor='red',
+            facecolor='none'
+        )
+        ax.add_patch(rect)
+        plt.text(
+            x, y-5,
+            label,
+            color='red',
+            fontsize=12,
+            bbox=dict(facecolor='white', alpha=0.8, edgecolor='none', pad=0)
+        )
+    plt.axis('off')
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
+    plt.close()
+    buf.seek(0)
+    return Image.open(buf)
+def process_florence(image, text_input, model, processor, device, torch_dtype):
+    """Process image with Florence-2"""
+    start_time = time.time()
+    task_prompt = "<CAPTION_TO_PHRASE_GROUNDING>"
+    prompt = task_prompt + text_input if text_input else task_prompt
+    inputs = processor(
+        text=prompt,
+        images=image,
+        return_tensors="pt"
+    ).to(device, torch_dtype)
+    generated_ids = model.generate(
+        input_ids=inputs["input_ids"],
+        pixel_values=inputs["pixel_values"],
+        max_new_tokens=2048,
+        num_beams=3
+    )
+    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+    parsed_answer = processor.post_process_generation(
+        generated_text,
+        task=task_prompt,
+        image_size=(image.width, image.height)
+    )
+    inference_time = time.time() - start_time
+    result = parsed_answer[task_prompt]
+    annotated_image = draw_bounding_boxes(
+        image,
+        result['bboxes'],
+        result['labels']
+    )
+    return result, inference_time, annotated_image
+def main():
+    st.markdown("<h1 style='font-size: 24px;'>🚗 Vehicle Analysis Pipeline</h1>", unsafe_allow_html=True)
+    # Load models
+    with st.spinner("Loading models... This might take a minute."):
+        qwen_model, qwen_processor, florence_model, florence_processor, device, torch_dtype = load_models()
+    # Initialize session state
+    if 'selected_image' not in st.session_state:
+        st.session_state.selected_image = None
+    if 'qwen_result' not in st.session_state:
+        st.session_state.qwen_result = None
+    if 'florence_result' not in st.session_state:
+        st.session_state.florence_result = None
+    if 'annotated_image' not in st.session_state:
+        st.session_state.annotated_image = None
+    # Image selection
+    col1, col2 = st.columns([1, 2])
+    with col1:
+        input_option = st.radio("Choose input method:", ["Use example image", "Upload image"], label_visibility="collapsed")
+        if input_option == "Upload image":
+            uploaded_file = st.file_uploader("Upload Image", type=["jpg", "jpeg", "png"], label_visibility="collapsed")
+            image_source = uploaded_file
+            if uploaded_file:
+                st.session_state.selected_image = uploaded_file
+        else:
+            image_source = st.session_state.selected_image
+        # Default prompt for Qwen
+        default_prompt = "What type of vehicle is this? Choose only from: car, pickup, bus, truck, motorbike, van. Answer only in one word."
+        prompt = st.text_area("Enter prompt for classification:", value=default_prompt, height=100)
+        analyze_button = st.button("Analyze Image", use_container_width=True, disabled=image_source is None)
+    # Display and process
+    if image_source:
+        try:
+            if isinstance(image_source, str):
+                image = Image.open(image_source).convert("RGB")
+            else:
+                image = Image.open(image_source).convert("RGB")
+            with col2:
+                st.image(image, caption="Selected Image", width=300)
+            if analyze_button:
+                # Step 1: Qwen Analysis
+                with st.spinner("Step 1: Classifying vehicle type..."):
+                    qwen_result, qwen_time = process_qwen(image, prompt, qwen_model, qwen_processor)
+                    st.session_state.qwen_result = qwen_result
+                # Step 2: Florence Analysis
+                with st.spinner("Step 2: Detecting vehicle location..."):
+                    florence_result, florence_time, annotated_image = process_florence(
+                        image,
+                        f"Find the {qwen_result} in the image",
+                        florence_model,
+                        florence_processor,
+                        device,
+                        torch_dtype
+                    )
+                    st.session_state.florence_result = florence_result
+                    st.session_state.annotated_image = annotated_image
+                # Display results
+                st.markdown("### Analysis Results")
+                # Qwen results
+                st.markdown("#### Step 1: Vehicle Classification")
+                st.markdown(f"**Type:** {st.session_state.qwen_result}")
+                st.markdown(f"*Classification time: {qwen_time:.2f} seconds*")
+                # Florence results
+                st.markdown("#### Step 2: Vehicle Detection")
+                st.image(annotated_image, caption="Vehicle Detection Result", use_container_width=True)
+                st.markdown(f"*Detection time: {florence_time:.2f} seconds*")
+                st.markdown("**Raw Detection Data:**")
+                st.json(florence_result)
+        except Exception as e:
+            st.error(f"Error processing image: {str(e)}")
+    # Example images section
+    if input_option == "Use example image":
+        st.markdown("### Example Images")
+        example_images = [f for f in os.listdir("images") if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
+        if example_images:
+            cols = st.columns(4)
+            for idx, img_name in enumerate(example_images):
+                with cols[idx % 4]:
+                    img_path = os.path.join("images", img_name)
+                    img = Image.open(img_path)
+                    img.thumbnail((150, 150))
+                    if st.button("📷", key=f"img_{idx}", help=img_name, use_container_width=True):
+                        st.session_state.selected_image = img_path
+                        st.rerun()
+                    st.image(img, caption=img_name, use_container_width=True)
+        else:
+            st.error("No example images found in the 'images' directory")
+if __name__ == "__main__":
+    main()

project/app_florence.py ADDED Viewed

	@@ -0,0 +1,223 @@

+import streamlit as st
+from transformers import (
+    AutoModelForCausalLM,
+    AutoProcessor
+)
+import torch
+from PIL import Image
+import time
+import os
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+import io
+import numpy as np
+@st.cache_resource
+def load_model():
+    """Load the model and processor (cached to prevent reloading)"""
+    device = "cuda:0" if torch.cuda.is_available() else "cpu"
+    torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+    model = AutoModelForCausalLM.from_pretrained(
+        "microsoft/Florence-2-large-ft",
+        torch_dtype=torch_dtype,
+        trust_remote_code=True
+    ).to(device)
+    processor = AutoProcessor.from_pretrained(
+        "microsoft/Florence-2-large-ft",
+        trust_remote_code=True
+    )
+    return model, processor, device, torch_dtype
+def draw_bounding_boxes(image, bboxes, labels):
+    """Draw bounding boxes and labels on the image"""
+    # Convert PIL image to numpy array
+    img_array = np.array(image)
+    # Create figure and axis
+    fig, ax = plt.subplots()
+    ax.imshow(img_array)
+    # Add each bounding box and label
+    for bbox, label in zip(bboxes, labels):
+        x, y, x2, y2 = bbox
+        width = x2 - x
+        height = y2 - y
+        # Create rectangle patch
+        rect = patches.Rectangle(
+            (x, y), width, height,
+            linewidth=2,
+            edgecolor='red',
+            facecolor='none'
+        )
+        ax.add_patch(rect)
+        # Add label above the box
+        plt.text(
+            x, y-5,
+            label,
+            color='red',
+            fontsize=12,
+            bbox=dict(facecolor='white', alpha=0.8, edgecolor='none', pad=0)
+        )
+    # Remove axes
+    plt.axis('off')
+    # Convert plot to image
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
+    plt.close()
+    buf.seek(0)
+    return Image.open(buf)
+def process_image(image, text_input, model, processor, device, torch_dtype):
+    """Process the image and return the model's output"""
+    start_time = time.time()
+    task_prompt = "<CAPTION_TO_PHRASE_GROUNDING>"
+    prompt = task_prompt + text_input if text_input else task_prompt
+    inputs = processor(
+        text=prompt,
+        images=image,
+        return_tensors="pt"
+    ).to(device, torch_dtype)
+    generated_ids = model.generate(
+        input_ids=inputs["input_ids"],
+        pixel_values=inputs["pixel_values"],
+        max_new_tokens=2048,
+        num_beams=3
+    )
+    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+    parsed_answer = processor.post_process_generation(
+        generated_text,
+        task=task_prompt,
+        image_size=(image.width, image.height)
+    )
+    inference_time = time.time() - start_time
+    # Create annotated image
+    result = parsed_answer[task_prompt]
+    annotated_image = draw_bounding_boxes(
+        image,
+        result['bboxes'],
+        result['labels']
+    )
+    return result, inference_time, annotated_image
+def main():
+    # Compact header
+    st.markdown("<h1 style='font-size: 24px;'>🔍 Image Analysis with Florence-2</h1>", unsafe_allow_html=True)
+    # Load model and processor
+    with st.spinner("Loading model... This might take a minute."):
+        model, processor, device, torch_dtype = load_model()
+    # Initialize session state
+    if 'selected_image' not in st.session_state:
+        st.session_state.selected_image = None
+    if 'result' not in st.session_state:
+        st.session_state.result = None
+    if 'inference_time' not in st.session_state:
+        st.session_state.inference_time = None
+    if 'annotated_image' not in st.session_state:
+        st.session_state.annotated_image = None
+    # Main content area
+    col1, col2, col3 = st.columns([1, 1.5, 1])
+    with col1:
+        # Input method selection
+        input_option = st.radio("Choose input method:", ["Use example image", "Upload image"], label_visibility="collapsed")
+        if input_option == "Upload image":
+            uploaded_file = st.file_uploader("Upload Image", type=["jpg", "jpeg", "png"], label_visibility="collapsed")
+            image_source = uploaded_file
+            if uploaded_file:
+                st.session_state.selected_image = uploaded_file
+        else:
+            image_source = st.session_state.selected_image
+    # Default prompt and analysis section
+    default_prompt = "What type of vehicle is this?"
+    prompt = st.text_area("Enter prompt:", value=default_prompt, height=100)
+    analyze_col1, analyze_col2 = st.columns([1, 2])
+    with analyze_col1:
+        analyze_button = st.button("Analyze Image", use_container_width=True, disabled=image_source is None)
+    # Display selected image and results
+    if image_source:
+        try:
+            if isinstance(image_source, str):
+                image = Image.open(image_source).convert("RGB")
+            else:
+                image = Image.open(image_source).convert("RGB")
+            st.image(image, caption="Selected Image", width=300)
+        except Exception as e:
+            st.error(f"Error loading image: {str(e)}")
+    # Analysis results
+    if analyze_button and image_source:
+        with st.spinner("Analyzing..."):
+            try:
+                result, inference_time, annotated_image = process_image(image, prompt, model, processor, device, torch_dtype)
+                st.session_state.result = result
+                st.session_state.inference_time = inference_time
+                st.session_state.annotated_image = annotated_image
+            except Exception as e:
+                st.error(f"Error: {str(e)}")
+    if st.session_state.result:
+        st.success("Analysis Complete!")
+        # Display the annotated image
+        st.image(st.session_state.annotated_image, caption="Analyzed Image with Detections", use_container_width=True)
+        # Display raw results and inference time
+        st.markdown("**Raw Results:**")
+        st.json(st.session_state.result)
+        st.markdown(f"*Inference time: {st.session_state.inference_time:.2f} seconds*")
+    # Example images section
+    if input_option == "Use example image":
+        st.markdown("### Example Images")
+        example_images = [f for f in os.listdir("images") if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
+        if example_images:
+            # Create grid of images
+            cols = st.columns(4)  # Adjust number of columns as needed
+            for idx, img_name in enumerate(example_images):
+                with cols[idx % 4]:
+                    img_path = os.path.join("images", img_name)
+                    img = Image.open(img_path)
+                    img.thumbnail((150, 150))
+                    # Make image clickable
+                    if st.button(
+                        "📷",
+                        key=f"img_{idx}",
+                        help=img_name,
+                        use_container_width=True
+                    ):
+                        st.session_state.selected_image = img_path
+                        st.rerun()
+                    # Display image with conditional styling
+                    st.image(
+                        img,
+                        caption=img_name,
+                        use_container_width=True,
+                    )
+        else:
+            st.error("No example images found in the 'images' directory")
+if __name__ == "__main__":
+    main()

project/app_master.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import streamlit as st
+import app_qwen
+import project.app_florence as app_florence
+import project.app_combined as app_combined
+# Set page configuration
+st.set_page_config(
+    page_title="Vehicle Analysis Suite",
+    page_icon="🚗",
+    layout="wide",
+    initial_sidebar_state="expanded"  # Show sidebar by default
+)
+# Custom CSS for the sidebar and main content
+st.markdown("""
+    <style>
+        .block-container {padding-top: 1rem; padding-bottom: 0rem;}
+        .element-container {margin-bottom: 0.5rem;}
+        .stButton button {width: 100%;}
+        h1 {margin-bottom: 1rem;}
+        .sidebar-content {
+            padding: 1rem;
+        }
+        .app-header {
+            text-align: center;
+            padding: 1rem;
+            background-color: #f0f2f6;
+            border-radius: 0.5rem;
+            margin-bottom: 2rem;
+        }
+    </style>
+""", unsafe_allow_html=True)
+def main():
+    # Sidebar for app selection
+    with st.sidebar:
+        st.markdown("### 🚗 Vehicle Analysis Suite")
+        st.markdown("---")
+        app_mode = st.radio(
+            "Select Analysis Mode:",
+            ["Qwen2-VL Classifier", "Florence-2 Detector", "Combined Pipeline"],
+            index=0,  # Default to Qwen2-VL
+            key="app_selection"
+        )
+        st.markdown("---")
+        st.markdown("""
+        ### About the Models:
+        **Qwen2-VL Classifier**
+        - Quick vehicle classification
+        - Single-word output
+        - Optimized for vehicle types
+        **Florence-2 Detector**
+        - Visual object detection
+        - Bounding box visualization
+        - Detailed spatial analysis
+        **Combined Pipeline**
+        - Two-stage analysis
+        - Classification + Detection
+        - Comprehensive results
+        """)
+    # Clear previous app states when switching
+    if 'last_app' not in st.session_state:
+        st.session_state.last_app = None
+    if st.session_state.last_app != app_mode:
+        # Clear relevant session state variables
+        for key in list(st.session_state.keys()):
+            if key not in ['app_selection', 'last_app']:
+                del st.session_state[key]
+        st.session_state.last_app = app_mode
+    # Main content area
+    if app_mode == "Qwen2-VL Classifier":
+        st.markdown("""
+            <div class='app-header'>
+                <h1>🤖 Qwen2-VL Vehicle Classifier</h1>
+                <p>Specialized in quick and accurate vehicle type classification</p>
+            </div>
+        """, unsafe_allow_html=True)
+        app_qwen.main()
+    elif app_mode == "Florence-2 Detector":
+        st.markdown("""
+            <div class='app-header'>
+                <h1>🔍 Florence-2 Vehicle Detector</h1>
+                <p>Advanced visual detection with bounding box visualization</p>
+            </div>
+        """, unsafe_allow_html=True)
+        app_florence.main()
+    else:  # Combined Pipeline
+        st.markdown("""
+            <div class='app-header'>
+                <h1>🚀 Combined Analysis Pipeline</h1>
+                <p>Comprehensive vehicle analysis using both models</p>
+            </div>
+        """, unsafe_allow_html=True)
+        app_combined.main()
+if __name__ == "__main__":
+    main()

project/images/bike.jpg ADDED Viewed

project/images/bus.jpg ADDED Viewed

project/images/car.jpg ADDED Viewed

project/images/pickup.jpg ADDED Viewed

project/images/truck.jpg ADDED Viewed

project/images/van.jpg ADDED Viewed