Spaces:

SkalskiP
/

segment-anything-model-2

Runtime error

App Files Files Community

segment-anything-model-2 / app.py

SkalskiP

points per side argument added

aa009f7 7 months ago

raw

history blame

4.34 kB

	from typing import Optional

	import gradio as gr
	import numpy as np
	import supervision as sv
	import torch
	from PIL import Image
	from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator

	from utils.models import load_models, CHECKPOINT_NAMES

	MARKDOWN = """
	# Segment Anything Model 2 🔥
	<div>
	<a href="https://github.com/facebookresearch/segment-anything-2">
	<img src="https://badges.aleen42.com/src/github.svg" alt="GitHub" style="display:inline-block;">
	</a>
	<a href="https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-segment-images-with-sam-2.ipynb">
	<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Colab" style="display:inline-block;">
	</a>
	<a href="https://blog.roboflow.com/what-is-segment-anything-2/">
	<img src="https://raw.githubusercontent.com/roboflow-ai/notebooks/main/assets/badges/roboflow-blogpost.svg" alt="Roboflow" style="display:inline-block;">
	</a>
	<a href="https://www.youtube.com/watch?v=Dv003fTyO-Y">
	<img src="https://badges.aleen42.com/src/youtube.svg" alt="YouTube" style="display:inline-block;">
	</a>
	</div>

	Segment Anything Model 2 (SAM 2) is a foundation model designed to address promptable
	visual segmentation in both images and videos. The model extends its functionality to
	video by treating images as single-frame videos. Its design, a simple transformer
	architecture with streaming memory, enables real-time video processing. A
	model-in-the-loop data engine, which enhances the model and data through user
	interaction, was built to collect the SA-V dataset, the largest video segmentation
	dataset to date. SAM 2, trained on this extensive dataset, delivers robust performance
	across diverse tasks and visual domains.
	"""
	EXAMPLES = [
	["tiny", "https://media.roboflow.com/notebooks/examples/dog-2.jpeg", 16],
	["small", "https://media.roboflow.com/notebooks/examples/dog-3.jpeg", 16],
	["large", "https://media.roboflow.com/notebooks/examples/dog-3.jpeg", 16],
	["large", "https://media.roboflow.com/notebooks/examples/dog-3.jpeg", 64],
	]

	DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	MASK_ANNOTATOR = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
	MODELS = load_models(device=DEVICE)


	def process(checkpoint_dropdown, image_input, points_per_side) -> Optional[Image.Image]:
	model = MODELS[checkpoint_dropdown]
	mask_generator = SAM2AutomaticMaskGenerator(
	model=model,
	points_per_side=points_per_side)
	image = np.array(image_input.convert("RGB"))
	sam_result = mask_generator.generate(image)
	detections = sv.Detections.from_sam(sam_result=sam_result)
	return MASK_ANNOTATOR.annotate(scene=image_input, detections=detections)


	with gr.Blocks() as demo:
	gr.Markdown(MARKDOWN)
	with gr.Row():
	checkpoint_dropdown_component = gr.Dropdown(
	choices=CHECKPOINT_NAMES,
	value=CHECKPOINT_NAMES[0],
	label="Checkpoint", info="Select a SAM2 checkpoint to use.",
	interactive=True
	)
	points_per_side_component = gr.Slider(
	minimum=16,
	maximum=64,
	value=16,
	step=16,
	label="Points per side",
	info="the number of points to be sampled along one side of the image."
	)
	with gr.Row():
	with gr.Column():
	image_input_component = gr.Image(type='pil', label='Upload image')
	submit_button_component = gr.Button(value='Submit', variant='primary')
	with gr.Column():
	image_output_component = gr.Image(type='pil', label='Image Output')
	with gr.Row():
	gr.Examples(
	fn=process,
	examples=EXAMPLES,
	inputs=[
	checkpoint_dropdown_component,
	image_input_component,
	points_per_side_component
	],
	outputs=[image_output_component],
	run_on_click=True
	)

	submit_button_component.click(
	fn=process,
	inputs=[
	checkpoint_dropdown_component,
	image_input_component,
	points_per_side_component
	],
	outputs=[image_output_component]
	)

	demo.launch(debug=False, show_error=True, max_threads=1)