yourusername commited on
Commit
a29ba80
·
1 Parent(s): 355acfb

:art: cleanup

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -56,6 +56,7 @@ def uniform_temporal_subsample(x: torch.Tensor, num_samples: int, temporal_dim:
56
  return torch.index_select(x, temporal_dim, indices)
57
 
58
 
 
59
  def short_side_scale(
60
  x: torch.Tensor,
61
  size: int,
@@ -87,9 +88,8 @@ def short_side_scale(
87
 
88
 
89
  def inference_step(vid, start_sec, duration, out_fps):
90
- # vid =
91
  clip = vid.get_clip(start_sec, start_sec + duration)
92
- # TxCxHxW -> CxTxHxW
93
  video_arr = torch.from_numpy(clip['video']).permute(3, 0, 1, 2)
94
  audio_arr = np.expand_dims(clip['audio'], 0)
95
  audio_fps = None if not vid._has_audio else vid._container.streams.audio[0].sample_rate
@@ -101,7 +101,6 @@ def inference_step(vid, start_sec, duration, out_fps):
101
  with torch.no_grad():
102
  output = model(x.to('cuda')).detach().cpu()
103
  output = (output * 0.5 + 0.5).clip(0, 1) * 255.0
104
- # CxTx512x512 -> TxCx512x512
105
  output_video = output.permute(0, 2, 3, 1).numpy()
106
 
107
  return output_video, audio_arr, out_fps, audio_fps
@@ -111,6 +110,7 @@ def predict_fn(filepath, start_sec, duration, out_fps):
111
  # out_fps=12
112
  vid = EncodedVideo.from_path(filepath)
113
  for i in range(duration):
 
114
  video, audio, fps, audio_fps = inference_step(vid=vid, start_sec=i + start_sec, duration=1, out_fps=out_fps)
115
  gc.collect()
116
  if i == 0:
@@ -120,8 +120,10 @@ def predict_fn(filepath, start_sec, duration, out_fps):
120
  video_all = np.concatenate((video_all, video))
121
  audio_all = np.hstack((audio_all, audio))
122
 
 
123
  write_video('out.mp4', video_all, fps=fps, audio_array=audio_all, audio_fps=audio_fps, audio_codec='aac')
124
 
 
125
  del video_all
126
  del audio_all
127
 
@@ -139,16 +141,16 @@ gr.Interface(
139
  inputs=[
140
  gr.inputs.Video(),
141
  gr.inputs.Slider(minimum=0, maximum=300, step=1, default=0),
142
- gr.inputs.Slider(minimum=1, maximum=10, step=1, default=2),
143
- gr.inputs.Slider(minimum=12, maximum=30, step=6, default=24),
144
  ],
145
  outputs=gr.outputs.Video(),
146
  title='AnimeGANV2 On Videos',
147
- description="Applying AnimeGAN-V2 to frame from video clips",
148
  article=article,
149
  enable_queue=True,
150
  examples=[
151
- ['obama.webm', 23, 10, 30],
152
  ],
153
  allow_flagging=False,
154
  ).launch(debug=True)
 
56
  return torch.index_select(x, temporal_dim, indices)
57
 
58
 
59
+ # This function is taken from pytorchvideo!
60
  def short_side_scale(
61
  x: torch.Tensor,
62
  size: int,
 
88
 
89
 
90
  def inference_step(vid, start_sec, duration, out_fps):
91
+
92
  clip = vid.get_clip(start_sec, start_sec + duration)
 
93
  video_arr = torch.from_numpy(clip['video']).permute(3, 0, 1, 2)
94
  audio_arr = np.expand_dims(clip['audio'], 0)
95
  audio_fps = None if not vid._has_audio else vid._container.streams.audio[0].sample_rate
 
101
  with torch.no_grad():
102
  output = model(x.to('cuda')).detach().cpu()
103
  output = (output * 0.5 + 0.5).clip(0, 1) * 255.0
 
104
  output_video = output.permute(0, 2, 3, 1).numpy()
105
 
106
  return output_video, audio_arr, out_fps, audio_fps
 
110
  # out_fps=12
111
  vid = EncodedVideo.from_path(filepath)
112
  for i in range(duration):
113
+ print(f"🖼️ Processing step {i + 1}/{duration}...")
114
  video, audio, fps, audio_fps = inference_step(vid=vid, start_sec=i + start_sec, duration=1, out_fps=out_fps)
115
  gc.collect()
116
  if i == 0:
 
120
  video_all = np.concatenate((video_all, video))
121
  audio_all = np.hstack((audio_all, audio))
122
 
123
+ print(f"💾 Writing output video...")
124
  write_video('out.mp4', video_all, fps=fps, audio_array=audio_all, audio_fps=audio_fps, audio_codec='aac')
125
 
126
+ print(f"✅ Done!")
127
  del video_all
128
  del audio_all
129
 
 
141
  inputs=[
142
  gr.inputs.Video(),
143
  gr.inputs.Slider(minimum=0, maximum=300, step=1, default=0),
144
+ gr.inputs.Slider(minimum=1, maximum=5, step=1, default=2),
145
+ gr.inputs.Slider(minimum=6, maximum=24, step=6, default=12),
146
  ],
147
  outputs=gr.outputs.Video(),
148
  title='AnimeGANV2 On Videos',
149
+ description="Applying AnimeGAN-V2 to frames from video clips",
150
  article=article,
151
  enable_queue=True,
152
  examples=[
153
+ ['obama.webm', 23, 5, 12],
154
  ],
155
  allow_flagging=False,
156
  ).launch(debug=True)