SkalskiP commited on
Commit
7761031
1 Parent(s): aabd771

Revert "working on video inference"

Browse files

This reverts commit aabd7712744df069cda860abd140284cf78b5f6d.

Files changed (4) hide show
  1. app.py +17 -133
  2. requirements.txt +0 -1
  3. utils/models.py +1 -1
  4. utils/video.py +0 -14
app.py CHANGED
@@ -1,19 +1,14 @@
1
- import os
2
  from typing import Optional
3
 
4
- import cv2
5
  import gradio as gr
6
  import numpy as np
7
  import supervision as sv
8
  import torch
9
  from PIL import Image
10
- from tqdm import tqdm
11
  from gradio_image_prompter import ImagePrompter
12
 
13
  from utils.models import load_models, CHECKPOINT_NAMES, MODE_NAMES, \
14
- MASK_GENERATION_MODE, BOX_PROMPT_MODE, VIDEO_SEGMENTATION_MODE
15
- from utils.video import create_directory, generate_unique_name
16
- from sam2.build_sam import build_sam2_video_predictor
17
 
18
  MARKDOWN = """
19
  # Segment Anything Model 2 🔥
@@ -36,7 +31,6 @@ Segment Anything Model 2 (SAM 2) is a foundation model designed to address promp
36
  visual segmentation in both images and videos. **Video segmentation will be available
37
  soon.**
38
  """
39
-
40
  EXAMPLES = [
41
  ["tiny", MASK_GENERATION_MODE, "https://media.roboflow.com/notebooks/examples/dog-2.jpeg", None],
42
  ["tiny", MASK_GENERATION_MODE, "https://media.roboflow.com/notebooks/examples/dog-3.jpeg", None],
@@ -47,37 +41,8 @@ DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
47
  MASK_ANNOTATOR = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
48
  IMAGE_PREDICTORS, MASK_GENERATORS = load_models(device=DEVICE)
49
 
50
- SCALE_FACTOR = 0.5
51
- TARGET_DIRECTORY = "tmp"
52
- # creating video results directory
53
- create_directory(directory_path=TARGET_DIRECTORY)
54
-
55
-
56
- def on_mode_dropdown_change(text):
57
- return [
58
- gr.Image(visible=text == MASK_GENERATION_MODE),
59
- ImagePrompter(visible=text == BOX_PROMPT_MODE),
60
- gr.Video(visible=text == VIDEO_SEGMENTATION_MODE),
61
- ImagePrompter(visible=text == VIDEO_SEGMENTATION_MODE),
62
- gr.Button(visible=text != VIDEO_SEGMENTATION_MODE),
63
- gr.Button(visible=text == VIDEO_SEGMENTATION_MODE),
64
- gr.Image(visible=text != VIDEO_SEGMENTATION_MODE),
65
- gr.Video(visible=text == VIDEO_SEGMENTATION_MODE)
66
- ]
67
-
68
 
69
- def on_video_input_change(video_input):
70
- if not video_input:
71
- return None
72
- frames_generator = sv.get_video_frames_generator(video_input)
73
- frame = next(frames_generator)
74
- frame = sv.scale_image(frame, SCALE_FACTOR)
75
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
76
- frame = Image.fromarray(frame)
77
- return {'image': frame, 'points': []}
78
-
79
-
80
- def process_image(
81
  checkpoint_dropdown,
82
  mode_dropdown,
83
  image_input,
@@ -114,64 +79,6 @@ def process_image(
114
  return MASK_ANNOTATOR.annotate(image_input, detections)
115
 
116
 
117
- def process_video(
118
- checkpoint_dropdown,
119
- mode_dropdown,
120
- video_input,
121
- video_prompter_input,
122
- progress=gr.Progress(track_tqdm=True)
123
- ) -> str:
124
- if mode_dropdown != VIDEO_SEGMENTATION_MODE:
125
- return str(video_input)
126
-
127
- name = generate_unique_name()
128
- frame_directory_path = os.path.join(TARGET_DIRECTORY, name)
129
- frames_sink = sv.ImageSink(
130
- target_dir_path=frame_directory_path,
131
- image_name_pattern="{:05d}.jpeg"
132
- )
133
-
134
- video_info = sv.VideoInfo.from_video_path(video_input)
135
- frames_generator = sv.get_video_frames_generator(video_input)
136
- with frames_sink:
137
- for frame in tqdm(
138
- frames_generator,
139
- total=video_info.total_frames,
140
- desc="splitting video into frames"
141
- ):
142
- frame = sv.scale_image(frame, SCALE_FACTOR)
143
- frames_sink.save_image(frame)
144
-
145
- model = build_sam2_video_predictor(
146
- "sam2_hiera_t.yaml",
147
- "checkpoints/sam2_hiera_tiny.pt",
148
- device=DEVICE
149
- )
150
- inference_state = model.init_state(
151
- video_path=frame_directory_path,
152
- offload_video_to_cpu=DEVICE == torch.device('cpu'),
153
- offload_state_to_cpu=DEVICE == torch.device('cpu'),
154
- )
155
-
156
- prompt = video_prompter_input["points"]
157
- points = np.array([[x1, y1] for x1, y1, _, _, _, _ in prompt])
158
- labels = np.ones(len(points))
159
-
160
- _, object_ids, mask_logits = model.add_new_points(
161
- inference_state=inference_state,
162
- frame_idx=0,
163
- obj_id=1,
164
- points=points,
165
- labels=labels,
166
- )
167
-
168
- del inference_state
169
- del model
170
-
171
- video_path = os.path.join(TARGET_DIRECTORY, f"{name}.mp4")
172
- return str(video_input)
173
-
174
-
175
  with gr.Blocks() as demo:
176
  gr.Markdown(MARKDOWN)
177
  with gr.Row():
@@ -187,8 +94,7 @@ with gr.Blocks() as demo:
187
  label="Mode",
188
  info="Select a mode to use. `box prompt` if you want to generate masks for "
189
  "selected objects, `mask generation` if you want to generate masks "
190
- "for the whole image, and `video segmentation` if you want to track "
191
- "object on video.",
192
  interactive=True
193
  )
194
  with gr.Row():
@@ -196,22 +102,14 @@ with gr.Blocks() as demo:
196
  image_input_component = gr.Image(
197
  type='pil', label='Upload image', visible=False)
198
  image_prompter_input_component = ImagePrompter(
199
- type='pil', label='Prompt image')
200
- video_input_component = gr.Video(
201
- label='Step 1: Upload video', visible=False)
202
- video_prompter_input_component = ImagePrompter(
203
- type='pil', label='Step 2: Prompt frame', visible=False)
204
- submit_image_button_component = gr.Button(
205
  value='Submit', variant='primary')
206
- submit_video_button_component = gr.Button(
207
- value='Submit', variant='primary', visible=False)
208
  with gr.Column():
209
- image_output_component = gr.Image(type='pil', label='Image output')
210
- video_output_component = gr.Video(
211
- label='Step 2: Video output', visible=False)
212
  with gr.Row():
213
  gr.Examples(
214
- fn=process_image,
215
  examples=EXAMPLES,
216
  inputs=[
217
  checkpoint_dropdown_component,
@@ -223,27 +121,23 @@ with gr.Blocks() as demo:
223
  run_on_click=True
224
  )
225
 
 
 
 
 
 
 
 
226
  mode_dropdown_component.change(
227
  on_mode_dropdown_change,
228
  inputs=[mode_dropdown_component],
229
  outputs=[
230
  image_input_component,
231
- image_prompter_input_component,
232
- video_input_component,
233
- video_prompter_input_component,
234
- submit_image_button_component,
235
- submit_video_button_component,
236
- image_output_component,
237
- video_output_component
238
  ]
239
  )
240
- video_input_component.change(
241
- fn=on_video_input_change,
242
- inputs=[video_input_component],
243
- outputs=[video_prompter_input_component]
244
- )
245
- submit_image_button_component.click(
246
- fn=process_image,
247
  inputs=[
248
  checkpoint_dropdown_component,
249
  mode_dropdown_component,
@@ -252,15 +146,5 @@ with gr.Blocks() as demo:
252
  ],
253
  outputs=[image_output_component]
254
  )
255
- submit_video_button_component.click(
256
- fn=process_video,
257
- inputs=[
258
- checkpoint_dropdown_component,
259
- mode_dropdown_component,
260
- video_input_component,
261
- video_prompter_input_component,
262
- ],
263
- outputs=[video_output_component]
264
- )
265
 
266
  demo.launch(debug=False, show_error=True, max_threads=1)
 
 
1
  from typing import Optional
2
 
 
3
  import gradio as gr
4
  import numpy as np
5
  import supervision as sv
6
  import torch
7
  from PIL import Image
 
8
  from gradio_image_prompter import ImagePrompter
9
 
10
  from utils.models import load_models, CHECKPOINT_NAMES, MODE_NAMES, \
11
+ MASK_GENERATION_MODE, BOX_PROMPT_MODE
 
 
12
 
13
  MARKDOWN = """
14
  # Segment Anything Model 2 🔥
 
31
  visual segmentation in both images and videos. **Video segmentation will be available
32
  soon.**
33
  """
 
34
  EXAMPLES = [
35
  ["tiny", MASK_GENERATION_MODE, "https://media.roboflow.com/notebooks/examples/dog-2.jpeg", None],
36
  ["tiny", MASK_GENERATION_MODE, "https://media.roboflow.com/notebooks/examples/dog-3.jpeg", None],
 
41
  MASK_ANNOTATOR = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
42
  IMAGE_PREDICTORS, MASK_GENERATORS = load_models(device=DEVICE)
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ def process(
 
 
 
 
 
 
 
 
 
 
 
46
  checkpoint_dropdown,
47
  mode_dropdown,
48
  image_input,
 
79
  return MASK_ANNOTATOR.annotate(image_input, detections)
80
 
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  with gr.Blocks() as demo:
83
  gr.Markdown(MARKDOWN)
84
  with gr.Row():
 
94
  label="Mode",
95
  info="Select a mode to use. `box prompt` if you want to generate masks for "
96
  "selected objects, `mask generation` if you want to generate masks "
97
+ "for the whole image.",
 
98
  interactive=True
99
  )
100
  with gr.Row():
 
102
  image_input_component = gr.Image(
103
  type='pil', label='Upload image', visible=False)
104
  image_prompter_input_component = ImagePrompter(
105
+ type='pil', label='Image prompt')
106
+ submit_button_component = gr.Button(
 
 
 
 
107
  value='Submit', variant='primary')
 
 
108
  with gr.Column():
109
+ image_output_component = gr.Image(type='pil', label='Image Output')
 
 
110
  with gr.Row():
111
  gr.Examples(
112
+ fn=process,
113
  examples=EXAMPLES,
114
  inputs=[
115
  checkpoint_dropdown_component,
 
121
  run_on_click=True
122
  )
123
 
124
+
125
+ def on_mode_dropdown_change(text):
126
+ return [
127
+ gr.Image(visible=text == MASK_GENERATION_MODE),
128
+ ImagePrompter(visible=text == BOX_PROMPT_MODE)
129
+ ]
130
+
131
  mode_dropdown_component.change(
132
  on_mode_dropdown_change,
133
  inputs=[mode_dropdown_component],
134
  outputs=[
135
  image_input_component,
136
+ image_prompter_input_component
 
 
 
 
 
 
137
  ]
138
  )
139
+ submit_button_component.click(
140
+ fn=process,
 
 
 
 
 
141
  inputs=[
142
  checkpoint_dropdown_component,
143
  mode_dropdown_component,
 
146
  ],
147
  outputs=[image_output_component]
148
  )
 
 
 
 
 
 
 
 
 
 
149
 
150
  demo.launch(debug=False, show_error=True, max_threads=1)
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- tqdm
2
  samv2
3
  gradio
4
  supervision
 
 
1
  samv2
2
  gradio
3
  supervision
utils/models.py CHANGED
@@ -8,7 +8,7 @@ from sam2.sam2_image_predictor import SAM2ImagePredictor
8
  BOX_PROMPT_MODE = "box prompt"
9
  MASK_GENERATION_MODE = "mask generation"
10
  VIDEO_SEGMENTATION_MODE = "video segmentation"
11
- MODE_NAMES = [BOX_PROMPT_MODE, MASK_GENERATION_MODE, VIDEO_SEGMENTATION_MODE]
12
 
13
  CHECKPOINT_NAMES = ["tiny", "small", "base_plus", "large"]
14
  CHECKPOINTS = {
 
8
  BOX_PROMPT_MODE = "box prompt"
9
  MASK_GENERATION_MODE = "mask generation"
10
  VIDEO_SEGMENTATION_MODE = "video segmentation"
11
+ MODE_NAMES = [BOX_PROMPT_MODE, MASK_GENERATION_MODE]
12
 
13
  CHECKPOINT_NAMES = ["tiny", "small", "base_plus", "large"]
14
  CHECKPOINTS = {
utils/video.py DELETED
@@ -1,14 +0,0 @@
1
- import os
2
- import uuid
3
- import datetime
4
-
5
-
6
- def create_directory(directory_path: str) -> None:
7
- if not os.path.exists(directory_path):
8
- os.makedirs(directory_path)
9
-
10
-
11
- def generate_unique_name():
12
- current_datetime = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
13
- unique_id = uuid.uuid4()
14
- return f"{current_datetime}_{unique_id}"