SkalskiP's picture
points per side argument added
aa009f7
raw
history blame
4.34 kB
from typing import Optional
import gradio as gr
import numpy as np
import supervision as sv
import torch
from PIL import Image
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
from utils.models import load_models, CHECKPOINT_NAMES
MARKDOWN = """
# Segment Anything Model 2 🔥
<div>
<a href="https://github.com/facebookresearch/segment-anything-2">
<img src="https://badges.aleen42.com/src/github.svg" alt="GitHub" style="display:inline-block;">
</a>
<a href="https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-segment-images-with-sam-2.ipynb">
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Colab" style="display:inline-block;">
</a>
<a href="https://blog.roboflow.com/what-is-segment-anything-2/">
<img src="https://raw.githubusercontent.com/roboflow-ai/notebooks/main/assets/badges/roboflow-blogpost.svg" alt="Roboflow" style="display:inline-block;">
</a>
<a href="https://www.youtube.com/watch?v=Dv003fTyO-Y">
<img src="https://badges.aleen42.com/src/youtube.svg" alt="YouTube" style="display:inline-block;">
</a>
</div>
Segment Anything Model 2 (SAM 2) is a foundation model designed to address promptable
visual segmentation in both images and videos. The model extends its functionality to
video by treating images as single-frame videos. Its design, a simple transformer
architecture with streaming memory, enables real-time video processing. A
model-in-the-loop data engine, which enhances the model and data through user
interaction, was built to collect the SA-V dataset, the largest video segmentation
dataset to date. SAM 2, trained on this extensive dataset, delivers robust performance
across diverse tasks and visual domains.
"""
EXAMPLES = [
["tiny", "https://media.roboflow.com/notebooks/examples/dog-2.jpeg", 16],
["small", "https://media.roboflow.com/notebooks/examples/dog-3.jpeg", 16],
["large", "https://media.roboflow.com/notebooks/examples/dog-3.jpeg", 16],
["large", "https://media.roboflow.com/notebooks/examples/dog-3.jpeg", 64],
]
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
MASK_ANNOTATOR = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
MODELS = load_models(device=DEVICE)
def process(checkpoint_dropdown, image_input, points_per_side) -> Optional[Image.Image]:
model = MODELS[checkpoint_dropdown]
mask_generator = SAM2AutomaticMaskGenerator(
model=model,
points_per_side=points_per_side)
image = np.array(image_input.convert("RGB"))
sam_result = mask_generator.generate(image)
detections = sv.Detections.from_sam(sam_result=sam_result)
return MASK_ANNOTATOR.annotate(scene=image_input, detections=detections)
with gr.Blocks() as demo:
gr.Markdown(MARKDOWN)
with gr.Row():
checkpoint_dropdown_component = gr.Dropdown(
choices=CHECKPOINT_NAMES,
value=CHECKPOINT_NAMES[0],
label="Checkpoint", info="Select a SAM2 checkpoint to use.",
interactive=True
)
points_per_side_component = gr.Slider(
minimum=16,
maximum=64,
value=16,
step=16,
label="Points per side",
info="the number of points to be sampled along one side of the image."
)
with gr.Row():
with gr.Column():
image_input_component = gr.Image(type='pil', label='Upload image')
submit_button_component = gr.Button(value='Submit', variant='primary')
with gr.Column():
image_output_component = gr.Image(type='pil', label='Image Output')
with gr.Row():
gr.Examples(
fn=process,
examples=EXAMPLES,
inputs=[
checkpoint_dropdown_component,
image_input_component,
points_per_side_component
],
outputs=[image_output_component],
run_on_click=True
)
submit_button_component.click(
fn=process,
inputs=[
checkpoint_dropdown_component,
image_input_component,
points_per_side_component
],
outputs=[image_output_component]
)
demo.launch(debug=False, show_error=True, max_threads=1)