yourusername commited on
Commit
059b9d8
·
1 Parent(s): 4014619

:sparkles: updates

Browse files
Files changed (2) hide show
  1. app.py +106 -43
  2. obama.webm +0 -0
app.py CHANGED
@@ -1,26 +1,41 @@
1
- from PIL import Image
2
- import torch
 
 
 
3
  import gradio as gr
4
  import numpy as np
 
5
  from encoded_video import EncodedVideo, write_video
6
- from io import BytesIO
 
7
 
8
- model2 = torch.hub.load(
9
  "AK391/animegan2-pytorch:main",
10
  "generator",
11
  pretrained=True,
12
  device="cuda",
13
  progress=True,
14
- force_reload=True,
15
- )
16
- face2paint = torch.hub.load(
17
- 'AK391/animegan2-pytorch:main', 'face2paint',
18
- size=512, device="cuda",side_by_side=False
19
  )
20
 
21
- def uniform_temporal_subsample(
22
- x: torch.Tensor, num_samples: int, temporal_dim: int = -3
23
- ) -> torch.Tensor:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  """
25
  Uniformly subsamples num_samples indices from the temporal dimension of the video.
26
  When num_samples is larger than the size of temporal dimension of the video, it
@@ -41,51 +56,99 @@ def uniform_temporal_subsample(
41
  return torch.index_select(x, temporal_dim, indices)
42
 
43
 
44
- def inference_video(video_file):
45
- out_fps = 12
46
- start_sec = 0
47
- duration = 2
48
- vid = EncodedVideo.from_path(video_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  clip = vid.get_clip(start_sec, start_sec + duration)
50
- video_arr = clip['video']
 
51
  audio_arr = np.expand_dims(clip['audio'], 0)
52
  audio_fps = None if not vid._has_audio else vid._container.streams.audio[0].sample_rate
53
 
54
- frames = uniform_temporal_subsample(torch.from_numpy(video_arr), duration * out_fps, 0).to(torch.uint8).numpy()
 
 
 
 
 
 
 
 
55
 
56
- out_frames = []
57
- for frame in frames:
58
- im = Image.fromarray(frame)
59
- out = face2paint(model2, im)
60
- out_frames.append(np.array(out))
61
 
62
 
63
- out_frames = np.array(out_frames)
 
 
 
 
 
 
 
 
 
 
 
64
 
65
- bytes_mp4 = bytes()
66
- out_file = BytesIO(bytes_mp4)
67
 
68
- # Add dummy file name to stream, as write_video will be looking for it
69
- out_file.name = "out.mp4"
70
 
71
- write_video(
72
- 'out.mp4',
73
- out_frames,
74
- fps=out_fps,
75
- audio_array=audio_arr,
76
- audio_fps=audio_fps,
77
- audio_codec='aac'
78
- )
79
  return 'out.mp4'
80
 
 
 
 
 
 
 
 
81
  gr.Interface(
82
- inference_video,
83
- inputs=gr.inputs.Video(),
 
 
 
 
 
84
  outputs=gr.outputs.Video(),
85
  title='AnimeGANV2 On Videos',
86
  description="Applying AnimeGAN-V2 to frame from video clips",
87
- article = "<p style='text-align: center'><a href='https://github.com/bryandlee/animegan2-pytorch' target='_blank'>Github Repo Pytorch</a></p><p style='text-align: center'>samples from repo: <img src='https://user-images.githubusercontent.com/26464535/129888683-98bb6283-7bb8-4d1a-a04a-e795f5858dcf.gif' alt='animation'/> <img src='https://user-images.githubusercontent.com/26464535/137619176-59620b59-4e20-4d98-9559-a424f86b7f24.jpg' alt='animation'/><img src='https://user-images.githubusercontent.com/26464535/127134790-93595da2-4f8b-4aca-a9d7-98699c5e6914.jpg' alt='animation'/></p>",
88
  enable_queue=True,
89
- # examples=examples,
90
- allow_flagging=False
91
- ).launch(debug=True)
 
 
 
1
+ import gc
2
+ import math
3
+ import tempfile
4
+ from io import BytesIO
5
+
6
  import gradio as gr
7
  import numpy as np
8
+ import torch
9
  from encoded_video import EncodedVideo, write_video
10
+ from PIL import Image
11
+ from torchvision.transforms.functional import center_crop, to_tensor
12
 
13
+ model = torch.hub.load(
14
  "AK391/animegan2-pytorch:main",
15
  "generator",
16
  pretrained=True,
17
  device="cuda",
18
  progress=True,
 
 
 
 
 
19
  )
20
 
21
+
22
+ def face2paint(model: torch.nn.Module, img: Image.Image, size: int = 512, device: str = 'cuda'):
23
+ w, h = img.size
24
+ s = min(w, h)
25
+ img = img.crop(((w - s) // 2, (h - s) // 2, (w + s) // 2, (h + s) // 2))
26
+ img = img.resize((size, size), Image.LANCZOS)
27
+
28
+ with torch.no_grad():
29
+ input = to_tensor(img).unsqueeze(0) * 2 - 1
30
+ output = model(input.to(device)).cpu()[0]
31
+
32
+ output = (output * 0.5 + 0.5).clip(0, 1) * 255.0
33
+
34
+ return output
35
+
36
+
37
+ # This function is taken from pytorchvideo!
38
+ def uniform_temporal_subsample(x: torch.Tensor, num_samples: int, temporal_dim: int = -3) -> torch.Tensor:
39
  """
40
  Uniformly subsamples num_samples indices from the temporal dimension of the video.
41
  When num_samples is larger than the size of temporal dimension of the video, it
 
56
  return torch.index_select(x, temporal_dim, indices)
57
 
58
 
59
+ def short_side_scale(
60
+ x: torch.Tensor,
61
+ size: int,
62
+ interpolation: str = "bilinear",
63
+ ) -> torch.Tensor:
64
+ """
65
+ Determines the shorter spatial dim of the video (i.e. width or height) and scales
66
+ it to the given size. To maintain aspect ratio, the longer side is then scaled
67
+ accordingly.
68
+ Args:
69
+ x (torch.Tensor): A video tensor of shape (C, T, H, W) and type torch.float32.
70
+ size (int): The size the shorter side is scaled to.
71
+ interpolation (str): Algorithm used for upsampling,
72
+ options: nearest' | 'linear' | 'bilinear' | 'bicubic' | 'trilinear' | 'area'
73
+ Returns:
74
+ An x-like Tensor with scaled spatial dims.
75
+ """
76
+ assert len(x.shape) == 4
77
+ assert x.dtype == torch.float32
78
+ c, t, h, w = x.shape
79
+ if w < h:
80
+ new_h = int(math.floor((float(h) / w) * size))
81
+ new_w = size
82
+ else:
83
+ new_h = size
84
+ new_w = int(math.floor((float(w) / h) * size))
85
+
86
+ return torch.nn.functional.interpolate(x, size=(new_h, new_w), mode=interpolation, align_corners=False)
87
+
88
+
89
+ def inference_step(vid, start_sec, duration, out_fps):
90
+ # vid =
91
  clip = vid.get_clip(start_sec, start_sec + duration)
92
+ # TxCxHxW -> CxTxHxW
93
+ video_arr = torch.from_numpy(clip['video']).permute(3, 0, 1, 2)
94
  audio_arr = np.expand_dims(clip['audio'], 0)
95
  audio_fps = None if not vid._has_audio else vid._container.streams.audio[0].sample_rate
96
 
97
+ x = uniform_temporal_subsample(video_arr, duration * out_fps)
98
+ x = center_crop(short_side_scale(x, 512), 512)
99
+ x /= 255.0
100
+ x = x.permute(1, 0, 2, 3)
101
+ with torch.no_grad():
102
+ output = model(x.to('cuda')).detach().cpu()
103
+ output = (output * 0.5 + 0.5).clip(0, 1) * 255.0
104
+ # CxTx512x512 -> TxCx512x512
105
+ output_video = output.permute(0, 2, 3, 1).numpy()
106
 
107
+ return output_video, audio_arr, out_fps, audio_fps
 
 
 
 
108
 
109
 
110
+ def predict_fn(filepath, start_sec, duration, out_fps):
111
+ # out_fps=12
112
+ vid = EncodedVideo.from_path(filepath)
113
+ for i in range(duration):
114
+ video, audio, fps, audio_fps = inference_step(vid=vid, start_sec=i + start_sec, duration=1, out_fps=out_fps)
115
+ gc.collect()
116
+ if i == 0:
117
+ video_all = video
118
+ audio_all = audio
119
+ else:
120
+ video_all = np.concatenate((video_all, video))
121
+ audio_all = np.hstack((audio_all, audio))
122
 
123
+ write_video('out.mp4', video_all, fps=fps, audio_array=audio_all, audio_fps=audio_fps, audio_codec='aac')
 
124
 
125
+ del video_all
126
+ del audio_all
127
 
 
 
 
 
 
 
 
 
128
  return 'out.mp4'
129
 
130
+
131
+ article = """
132
+ <p style='text-align: center'>
133
+ <a href='https://github.com/bryandlee/animegan2-pytorch' target='_blank'>Github Repo Pytorch</a>
134
+ </p>
135
+ """
136
+
137
  gr.Interface(
138
+ predict_fn,
139
+ inputs=[
140
+ gr.inputs.Video(),
141
+ gr.inputs.Slider(minimum=0, maximum=300, step=1, default=0),
142
+ gr.inputs.Slider(minimum=1, maximum=10, step=1, default=2),
143
+ gr.inputs.Slider(minimum=12, maximum=30, step=6, default=24),
144
+ ],
145
  outputs=gr.outputs.Video(),
146
  title='AnimeGANV2 On Videos',
147
  description="Applying AnimeGAN-V2 to frame from video clips",
148
+ article=article,
149
  enable_queue=True,
150
+ examples=[
151
+ ['obama.webm', 23, 10, 30],
152
+ ],
153
+ allow_flagging=False,
154
+ ).launch(debug=True)
obama.webm ADDED
Binary file (5.69 MB). View file