Spaces:
Runtime error
Runtime error
Commit
·
cfd00dd
1
Parent(s):
5b1f1d2
Create encoder_psp.py
Browse files- encoder_psp.py +125 -0
encoder_psp.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This file defines the core research contribution
|
3 |
+
"""
|
4 |
+
import matplotlib
|
5 |
+
matplotlib.use('Agg')
|
6 |
+
import math
|
7 |
+
|
8 |
+
import torch
|
9 |
+
from torch import nn
|
10 |
+
from model.encoder.encoders import psp_encoders
|
11 |
+
from model.stylegan.model import Generator
|
12 |
+
|
13 |
+
def get_keys(d, name):
|
14 |
+
if 'state_dict' in d:
|
15 |
+
d = d['state_dict']
|
16 |
+
d_filt = {k[len(name) + 1:]: v for k, v in d.items() if k[:len(name)] == name}
|
17 |
+
return d_filt
|
18 |
+
|
19 |
+
|
20 |
+
class pSp(nn.Module):
|
21 |
+
|
22 |
+
def __init__(self, opts):
|
23 |
+
super(pSp, self).__init__()
|
24 |
+
self.set_opts(opts)
|
25 |
+
# compute number of style inputs based on the output resolution
|
26 |
+
self.opts.n_styles = int(math.log(self.opts.output_size, 2)) * 2 - 2
|
27 |
+
# Define architecture
|
28 |
+
self.encoder = self.set_encoder()
|
29 |
+
self.decoder = Generator(self.opts.output_size, 512, 8)
|
30 |
+
self.face_pool = torch.nn.AdaptiveAvgPool2d((256, 256))
|
31 |
+
# Load weights if needed
|
32 |
+
self.load_weights()
|
33 |
+
|
34 |
+
def set_encoder(self):
|
35 |
+
if self.opts.encoder_type == 'GradualStyleEncoder':
|
36 |
+
encoder = psp_encoders.GradualStyleEncoder(50, 'ir_se', self.opts)
|
37 |
+
elif self.opts.encoder_type == 'BackboneEncoderUsingLastLayerIntoW':
|
38 |
+
encoder = psp_encoders.BackboneEncoderUsingLastLayerIntoW(50, 'ir_se', self.opts)
|
39 |
+
elif self.opts.encoder_type == 'BackboneEncoderUsingLastLayerIntoWPlus':
|
40 |
+
encoder = psp_encoders.BackboneEncoderUsingLastLayerIntoWPlus(50, 'ir_se', self.opts)
|
41 |
+
else:
|
42 |
+
raise Exception('{} is not a valid encoders'.format(self.opts.encoder_type))
|
43 |
+
return encoder
|
44 |
+
|
45 |
+
def load_weights(self):
|
46 |
+
if self.opts.checkpoint_path is not None:
|
47 |
+
print('Loading pSp from checkpoint: {}'.format(self.opts.checkpoint_path))
|
48 |
+
ckpt = torch.load(self.opts.checkpoint_path, map_location='cpu')
|
49 |
+
self.encoder.load_state_dict(get_keys(ckpt, 'encoder'), strict=True)
|
50 |
+
self.decoder.load_state_dict(get_keys(ckpt, 'decoder'), strict=True)
|
51 |
+
self.__load_latent_avg(ckpt)
|
52 |
+
else:
|
53 |
+
pass
|
54 |
+
'''print('Loading encoders weights from irse50!')
|
55 |
+
encoder_ckpt = torch.load(model_paths['ir_se50'])
|
56 |
+
# if input to encoder is not an RGB image, do not load the input layer weights
|
57 |
+
if self.opts.label_nc != 0:
|
58 |
+
encoder_ckpt = {k: v for k, v in encoder_ckpt.items() if "input_layer" not in k}
|
59 |
+
self.encoder.load_state_dict(encoder_ckpt, strict=False)
|
60 |
+
print('Loading decoder weights from pretrained!')
|
61 |
+
ckpt = torch.load(self.opts.stylegan_weights)
|
62 |
+
self.decoder.load_state_dict(ckpt['g_ema'], strict=False)
|
63 |
+
if self.opts.learn_in_w:
|
64 |
+
self.__load_latent_avg(ckpt, repeat=1)
|
65 |
+
else:
|
66 |
+
self.__load_latent_avg(ckpt, repeat=self.opts.n_styles)
|
67 |
+
'''
|
68 |
+
|
69 |
+
def forward(self, x, resize=True, latent_mask=None, input_code=False, randomize_noise=True,
|
70 |
+
inject_latent=None, return_latents=False, alpha=None, z_plus_latent=False, return_z_plus_latent=True):
|
71 |
+
if input_code:
|
72 |
+
codes = x
|
73 |
+
else:
|
74 |
+
codes = self.encoder(x)
|
75 |
+
#print(codes.shape)
|
76 |
+
# normalize with respect to the center of an average face
|
77 |
+
if self.opts.start_from_latent_avg:
|
78 |
+
if self.opts.learn_in_w:
|
79 |
+
codes = codes + self.latent_avg.repeat(codes.shape[0], 1)
|
80 |
+
else:
|
81 |
+
codes = codes + self.latent_avg.repeat(codes.shape[0], 1, 1)
|
82 |
+
|
83 |
+
|
84 |
+
if latent_mask is not None:
|
85 |
+
for i in latent_mask:
|
86 |
+
if inject_latent is not None:
|
87 |
+
if alpha is not None:
|
88 |
+
codes[:, i] = alpha * inject_latent[:, i] + (1 - alpha) * codes[:, i]
|
89 |
+
else:
|
90 |
+
codes[:, i] = inject_latent[:, i]
|
91 |
+
else:
|
92 |
+
codes[:, i] = 0
|
93 |
+
|
94 |
+
input_is_latent = not input_code
|
95 |
+
if z_plus_latent:
|
96 |
+
input_is_latent = False
|
97 |
+
images, result_latent = self.decoder([codes],
|
98 |
+
input_is_latent=input_is_latent,
|
99 |
+
randomize_noise=randomize_noise,
|
100 |
+
return_latents=return_latents,
|
101 |
+
z_plus_latent=z_plus_latent)
|
102 |
+
|
103 |
+
if resize:
|
104 |
+
images = self.face_pool(images)
|
105 |
+
|
106 |
+
if return_latents:
|
107 |
+
if z_plus_latent and return_z_plus_latent:
|
108 |
+
return images, codes
|
109 |
+
if z_plus_latent and not return_z_plus_latent:
|
110 |
+
return images, result_latent
|
111 |
+
else:
|
112 |
+
return images, result_latent
|
113 |
+
else:
|
114 |
+
return images
|
115 |
+
|
116 |
+
def set_opts(self, opts):
|
117 |
+
self.opts = opts
|
118 |
+
|
119 |
+
def __load_latent_avg(self, ckpt, repeat=None):
|
120 |
+
if 'latent_avg' in ckpt:
|
121 |
+
self.latent_avg = ckpt['latent_avg'].to(self.opts.device)
|
122 |
+
if repeat is not None:
|
123 |
+
self.latent_avg = self.latent_avg.repeat(repeat, 1)
|
124 |
+
else:
|
125 |
+
self.latent_avg = None
|