Spaces:

WindVChen
/

INR-Harmon

Running

App Files Files Community

WindVChen commited on Jul 21, 2023

Commit

e200a3f

1 Parent(s): 2cfb929

Upload 23 files

Browse files

Files changed (23) hide show

README.md +1 -1
app.py +26 -27
demo/demo_1k_composite_2.jpg +0 -0
demo/demo_1k_composite_3.jpg +0 -0
demo/demo_1k_mask_2.jpg +0 -0
demo/demo_1k_mask_3.jpg +0 -0
demo/demo_composite.jpg +0 -0
demo/demo_composite_1.jpg +0 -0
demo/demo_composite_2.jpg +0 -0
demo/demo_composite_3.jpg +0 -0
demo/demo_composite_4.jpg +0 -0
demo/demo_composite_5.jpg +0 -0
demo/demo_composite_6.jpg +0 -0
demo/demo_mask.png +0 -0
demo/demo_mask_1.png +0 -0
demo/demo_mask_2.png +0 -0
demo/demo_mask_3.png +0 -0
demo/demo_mask_4.jpg +0 -0
demo/demo_mask_5.jpg +0 -0
demo/demo_mask_6.jpg +0 -0
efficient_inference_for_square_image.py +3 -3
hrnet_ocr.py +400 -0
inference_for_arbitrary_resolution_image.py +4 -3

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 👋🏃‍♂️
 colorFrom: purple
 colorTo: pink
 sdk: gradio
-sdk_version: 3.24.0
 app_file: app.py
 python_version: 3.8.11
 pinned: false

 colorFrom: purple
 colorTo: pink
 sdk: gradio
+sdk_version: 3.26.0
 app_file: app.py
 python_version: 3.8.11
 pinned: false

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ import gradio as gr
 import numpy as np
 import sys
 import io
-import torch
 class Logger:
@@ -38,7 +37,7 @@ def read_logs():
     return out
-with gr.Blocks() as app:
     gr.Markdown("""
 # HINet (or INR-Harmonization) - A novel image Harmonization method based on Implicit neural Networks
 ## Harmonize any image you want! Arbitrary resolution, and arbitrary aspect ratio!
@@ -49,6 +48,16 @@ with gr.Blocks() as app:
 * Official Repo: [INR-Harmonization](https://github.com/WindVChen/INR-Harmonization)
 """)
     valid_checkpoints_dict = {"Resolution_256_iHarmony4": "Resolution_256_iHarmony4.pth",
                               "Resolution_1024_HAdobe5K": "Resolution_1024_HAdobe5K.pth",
                               "Resolution_2048_HAdobe5K": "Resolution_2048_HAdobe5K.pth",
@@ -61,13 +70,12 @@ with gr.Blocks() as app:
     })
     with gr.Row():
         with gr.Column():
-            form_composite_image = gr.Image(label='Input Composite image', type='pil').style(height="auto")
-            gr.Examples(examples=[os.path.join("demo", i) for i in os.listdir("demo") if "composite" in i],
                         label="Composite Examples", inputs=form_composite_image, cache_examples=False)
         with gr.Column():
-            form_mask_image = gr.Image(label='Input Mask image', type='pil', interactive=False).style(
-                height="auto")
-            gr.Examples(examples=[os.path.join("demo", i) for i in os.listdir("demo") if "mask" in i],
                         label="Mask Examples", inputs=form_mask_image, cache_examples=False)
     with gr.Row():
         with gr.Column(scale=4):
@@ -109,15 +117,14 @@ with gr.Blocks() as app:
                         label="Split Resolution",
                     )
                     form_split_num = gr.Number(
-                        value=8,
                         interactive=False,
                         label="Split Number")
             with gr.Row():
                 form_log = gr.Textbox(read_logs, label="Logs", interactive=False, type="text", every=1)
         with gr.Column(scale=4):
-            form_harmonized_image = gr.Image(label='Harmonized Result', type='numpy', interactive=False).style(
-                height="auto")
             form_start_btn = gr.Button("Start Harmonization", interactive=False)
             form_reset_btn = gr.Button("Reset", interactive=True)
             form_stop_btn = gr.Button("Stop", interactive=True)
@@ -126,7 +133,7 @@ with gr.Blocks() as app:
     def on_change_form_composite_image(form_composite_image):
         if form_composite_image is None:
             return gr.update(interactive=False, value=None), gr.update(value=None)
-        return gr.update(interactive=True), gr.update(value=None)
     def on_change_form_mask_image(form_composite_image, form_mask_image):
@@ -141,15 +148,15 @@ with gr.Blocks() as app:
             w, h = form_composite_image.size[:2]
             if h != w or (h % 16 != 0):
                 return gr.update(value='Arbitrary Image', interactive=False), gr.update(interactive=True), gr.update(
-                    interactive=True), gr.update(interactive=True), gr.update(interactive=False,
-                                                                              value=-1), gr.update(value=None)
             else:
                 return gr.update(value='Square Image', interactive=True), gr.update(interactive=True), gr.update(
-                    interactive=True), gr.update(interactive=False), gr.update(interactive=True,
-                                                                               value=h // 16,
                                                                                maximum=h,
                                                                                minimum=h // 16,
-                                                                               step=h // 16), gr.update(value=None)
     form_composite_image.change(
@@ -185,9 +192,9 @@ with gr.Blocks() as app:
     def on_change_form_inference_mode(form_inference_mode):
         if form_inference_mode == "Square Image":
-            return gr.update(interactive=True), gr.update(interactive=False)
         else:
-            return gr.update(interactive=False), gr.update(interactive=True)
     form_inference_mode.change(on_change_form_inference_mode, inputs=[form_inference_mode],
@@ -197,6 +204,7 @@ with gr.Blocks() as app:
     def on_click_form_start_btn(form_composite_image, form_mask_image, form_pretrained_dropdown, form_inference_mode,
                                 form_split_res, form_split_num):
         log.log = io.BytesIO()
         if form_inference_mode == "Square Image":
             from efficient_inference_for_square_image import parse_args, main_process, global_state
             global_state[0] = 1
@@ -287,15 +295,6 @@ with gr.Blocks() as app:
                         inputs=[form_inference_mode],
                         outputs=[form_log, form_composite_image, form_mask_image, form_start_btn], cancels=generate)
-    gr.Markdown("""
-        ## Quick Start
-        1. Select desired `Pretrained Model`.
-        2. Select a composite image, and then a mask with the same size.
-        3. Select the inference mode (for non-square image, only `Arbitrary Image` support).
-        4. Set `Split Resolution` (Patches' resolution) or `Split Number` (How many patches, about N*N) according to the inference mode.
-        3. Click `Start` and enjoy it!
-        """)
     gr.HTML("""
         <style>
             .container {

 import numpy as np
 import sys
 import io
 class Logger:
     return out
+with gr.Blocks(css=".output-image, .input-image, .image-preview {height: 600px !important}") as app:
     gr.Markdown("""
 # HINet (or INR-Harmonization) - A novel image Harmonization method based on Implicit neural Networks
 ## Harmonize any image you want! Arbitrary resolution, and arbitrary aspect ratio!
 * Official Repo: [INR-Harmonization](https://github.com/WindVChen/INR-Harmonization)
 """)
+    gr.Markdown("""
+            ## Quick Start
+            1. Select desired `Pretrained Model`.
+            2. Select a composite image, and then a mask with the same size.
+            3. Select the inference mode (for non-square image, only `Arbitrary Image` support). Also note that `Square Image` mode will be much faster than `Arbitrary Image` mode.
+            4. Set `Split Resolution` (Patches' resolution) or `Split Number` (How many patches, about N*N) according to the inference mode.
+            3. Click `Start` and enjoy it!
+            """)
     valid_checkpoints_dict = {"Resolution_256_iHarmony4": "Resolution_256_iHarmony4.pth",
                               "Resolution_1024_HAdobe5K": "Resolution_1024_HAdobe5K.pth",
                               "Resolution_2048_HAdobe5K": "Resolution_2048_HAdobe5K.pth",
     })
     with gr.Row():
         with gr.Column():
+            form_composite_image = gr.Image(label='Input Composite image', type='pil').style(height=512)
+            gr.Examples(examples=sorted([os.path.join("demo", i) for i in os.listdir("demo") if "composite" in i]),
                         label="Composite Examples", inputs=form_composite_image, cache_examples=False)
         with gr.Column():
+            form_mask_image = gr.Image(label='Input Mask image', type='pil', interactive=False).style(height=512)
+            gr.Examples(examples=sorted([os.path.join("demo", i) for i in os.listdir("demo") if "mask" in i]),
                         label="Mask Examples", inputs=form_mask_image, cache_examples=False)
     with gr.Row():
         with gr.Column(scale=4):
                         label="Split Resolution",
                     )
                     form_split_num = gr.Number(
+                        value=2,
                         interactive=False,
                         label="Split Number")
             with gr.Row():
                 form_log = gr.Textbox(read_logs, label="Logs", interactive=False, type="text", every=1)
         with gr.Column(scale=4):
+            form_harmonized_image = gr.Image(label='Harmonized Result', type='numpy', interactive=False).style(height=512)
             form_start_btn = gr.Button("Start Harmonization", interactive=False)
             form_reset_btn = gr.Button("Reset", interactive=True)
             form_stop_btn = gr.Button("Stop", interactive=True)
     def on_change_form_composite_image(form_composite_image):
         if form_composite_image is None:
             return gr.update(interactive=False, value=None), gr.update(value=None)
+        return gr.update(interactive=True, value=None), gr.update(value=None)
     def on_change_form_mask_image(form_composite_image, form_mask_image):
             w, h = form_composite_image.size[:2]
             if h != w or (h % 16 != 0):
                 return gr.update(value='Arbitrary Image', interactive=False), gr.update(interactive=True), gr.update(
+                    interactive=True), gr.update(interactive=True, visible=True), gr.update(interactive=False,
+                                                                              value=-1, visible=False), gr.update(value=None)
             else:
                 return gr.update(value='Square Image', interactive=True), gr.update(interactive=True), gr.update(
+                    interactive=True), gr.update(interactive=False, visible=False), gr.update(interactive=True,
+                                                                               value=h // 2,
                                                                                maximum=h,
                                                                                minimum=h // 16,
+                                                                               step=h // 16, visible=True), gr.update(value=None)
     form_composite_image.change(
     def on_change_form_inference_mode(form_inference_mode):
         if form_inference_mode == "Square Image":
+            return gr.update(interactive=True, visible=True), gr.update(interactive=False, visible=False)
         else:
+            return gr.update(interactive=False, visible=False), gr.update(interactive=True, visible=True)
     form_inference_mode.change(on_change_form_inference_mode, inputs=[form_inference_mode],
     def on_click_form_start_btn(form_composite_image, form_mask_image, form_pretrained_dropdown, form_inference_mode,
                                 form_split_res, form_split_num):
         log.log = io.BytesIO()
+        print(f"Harmonizing image with {form_composite_image.size[1]}*{form_composite_image.size[0]}...")
         if form_inference_mode == "Square Image":
             from efficient_inference_for_square_image import parse_args, main_process, global_state
             global_state[0] = 1
                         inputs=[form_inference_mode],
                         outputs=[form_log, form_composite_image, form_mask_image, form_start_btn], cancels=generate)
     gr.HTML("""
         <style>
             .container {

demo/demo_1k_composite_2.jpg ADDED Viewed

demo/demo_1k_composite_3.jpg ADDED Viewed

demo/demo_1k_mask_2.jpg ADDED Viewed

demo/demo_1k_mask_3.jpg ADDED Viewed

demo/demo_composite.jpg ADDED Viewed

demo/demo_composite_1.jpg ADDED Viewed

demo/demo_composite_2.jpg ADDED Viewed

demo/demo_composite_3.jpg ADDED Viewed

demo/demo_composite_4.jpg ADDED Viewed

demo/demo_composite_5.jpg ADDED Viewed

demo/demo_composite_6.jpg ADDED Viewed

demo/demo_mask.png ADDED Viewed

demo/demo_mask_1.png ADDED Viewed

demo/demo_mask_2.png ADDED Viewed

demo/demo_mask_3.png ADDED Viewed

demo/demo_mask_4.jpg ADDED Viewed

demo/demo_mask_5.jpg ADDED Viewed

demo/demo_mask_6.jpg ADDED Viewed

efficient_inference_for_square_image.py CHANGED Viewed

@@ -284,6 +284,7 @@ def inference(model, opt, composite_image=None, mask=None):
                     mask,
                     fg_INR_coordinates, start_proportion[0]
                 )
             if opt.device == "cuda":
                 torch.cuda.reset_max_memory_allocated()
                 torch.cuda.reset_max_memory_cached()
@@ -333,12 +334,11 @@ def inference(model, opt, composite_image=None, mask=None):
 def main_process(opt, composite_image=None, mask=None):
     cudnn.benchmark = True
     model = build_model(opt).to(opt.device)
     load_dict = torch.load(opt.pretrained, map_location='cpu')['model']
-    for k in load_dict.keys():
-        if k not in model.state_dict().keys():
-            print(f"Skip {k}")
     model.load_state_dict(load_dict, strict=False)
     return inference(model, opt, composite_image, mask)

                     mask,
                     fg_INR_coordinates, start_proportion[0]
                 )
+                print("Ready for harmonization...")
             if opt.device == "cuda":
                 torch.cuda.reset_max_memory_allocated()
                 torch.cuda.reset_max_memory_cached()
 def main_process(opt, composite_image=None, mask=None):
     cudnn.benchmark = True
+    print("Preparing model...")
     model = build_model(opt).to(opt.device)
     load_dict = torch.load(opt.pretrained, map_location='cpu')['model']
     model.load_state_dict(load_dict, strict=False)
     return inference(model, opt, composite_image, mask)

hrnet_ocr.py ADDED Viewed

	@@ -0,0 +1,400 @@

+import os
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch._utils
+from .ocr import SpatialOCR_Module, SpatialGather_Module
+from .resnetv1b import BasicBlockV1b, BottleneckV1b
+relu_inplace = True
+class HighResolutionModule(nn.Module):
+    def __init__(self, num_branches, blocks, num_blocks, num_inchannels,
+                 num_channels, fuse_method,multi_scale_output=True,
+                 norm_layer=nn.BatchNorm2d, align_corners=True):
+        super(HighResolutionModule, self).__init__()
+        self._check_branches(num_branches, num_blocks, num_inchannels, num_channels)
+        self.num_inchannels = num_inchannels
+        self.fuse_method = fuse_method
+        self.num_branches = num_branches
+        self.norm_layer = norm_layer
+        self.align_corners = align_corners
+        self.multi_scale_output = multi_scale_output
+        self.branches = self._make_branches(
+            num_branches, blocks, num_blocks, num_channels)
+        self.fuse_layers = self._make_fuse_layers()
+        self.relu = nn.ReLU(inplace=relu_inplace)
+    def _check_branches(self, num_branches, num_blocks, num_inchannels, num_channels):
+        if num_branches != len(num_blocks):
+            error_msg = 'NUM_BRANCHES({}) <> NUM_BLOCKS({})'.format(
+                num_branches, len(num_blocks))
+            raise ValueError(error_msg)
+        if num_branches != len(num_channels):
+            error_msg = 'NUM_BRANCHES({}) <> NUM_CHANNELS({})'.format(
+                num_branches, len(num_channels))
+            raise ValueError(error_msg)
+        if num_branches != len(num_inchannels):
+            error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format(
+                num_branches, len(num_inchannels))
+            raise ValueError(error_msg)
+    def _make_one_branch(self, branch_index, block, num_blocks, num_channels,
+                         stride=1):
+        downsample = None
+        if stride != 1 or \
+                self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.num_inchannels[branch_index],
+                          num_channels[branch_index] * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                self.norm_layer(num_channels[branch_index] * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.num_inchannels[branch_index],
+                            num_channels[branch_index], stride,
+                            downsample=downsample, norm_layer=self.norm_layer))
+        self.num_inchannels[branch_index] = \
+            num_channels[branch_index] * block.expansion
+        for i in range(1, num_blocks[branch_index]):
+            layers.append(block(self.num_inchannels[branch_index],
+                                num_channels[branch_index],
+                                norm_layer=self.norm_layer))
+        return nn.Sequential(*layers)
+    def _make_branches(self, num_branches, block, num_blocks, num_channels):
+        branches = []
+        for i in range(num_branches):
+            branches.append(
+                self._make_one_branch(i, block, num_blocks, num_channels))
+        return nn.ModuleList(branches)
+    def _make_fuse_layers(self):
+        if self.num_branches == 1:
+            return None
+        num_branches = self.num_branches
+        num_inchannels = self.num_inchannels
+        fuse_layers = []
+        for i in range(num_branches if self.multi_scale_output else 1):
+            fuse_layer = []
+            for j in range(num_branches):
+                if j > i:
+                    fuse_layer.append(nn.Sequential(
+                        nn.Conv2d(in_channels=num_inchannels[j],
+                                  out_channels=num_inchannels[i],
+                                  kernel_size=1,
+                                  bias=False),
+                        self.norm_layer(num_inchannels[i])))
+                elif j == i:
+                    fuse_layer.append(None)
+                else:
+                    conv3x3s = []
+                    for k in range(i - j):
+                        if k == i - j - 1:
+                            num_outchannels_conv3x3 = num_inchannels[i]
+                            conv3x3s.append(nn.Sequential(
+                                nn.Conv2d(num_inchannels[j],
+                                          num_outchannels_conv3x3,
+                                          kernel_size=3, stride=2, padding=1, bias=False),
+                                self.norm_layer(num_outchannels_conv3x3)))
+                        else:
+                            num_outchannels_conv3x3 = num_inchannels[j]
+                            conv3x3s.append(nn.Sequential(
+                                nn.Conv2d(num_inchannels[j],
+                                          num_outchannels_conv3x3,
+                                          kernel_size=3, stride=2, padding=1, bias=False),
+                                self.norm_layer(num_outchannels_conv3x3),
+                                nn.ReLU(inplace=relu_inplace)))
+                    fuse_layer.append(nn.Sequential(*conv3x3s))
+            fuse_layers.append(nn.ModuleList(fuse_layer))
+        return nn.ModuleList(fuse_layers)
+    def get_num_inchannels(self):
+        return self.num_inchannels
+    def forward(self, x):
+        if self.num_branches == 1:
+            return [self.branches[0](x[0])]
+        for i in range(self.num_branches):
+            x[i] = self.branches[i](x[i])
+        x_fuse = []
+        for i in range(len(self.fuse_layers)):
+            y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
+            for j in range(1, self.num_branches):
+                if i == j:
+                    y = y + x[j]
+                elif j > i:
+                    width_output = x[i].shape[-1]
+                    height_output = x[i].shape[-2]
+                    y = y + F.interpolate(
+                        self.fuse_layers[i][j](x[j]),
+                        size=[height_output, width_output],
+                        mode='bilinear', align_corners=self.align_corners)
+                else:
+                    y = y + self.fuse_layers[i][j](x[j])
+            x_fuse.append(self.relu(y))
+        return x_fuse
+class HighResolutionNet(nn.Module):
+    def __init__(self, width, num_classes, ocr_width=256, small=False,
+                 norm_layer=nn.BatchNorm2d, align_corners=True, opt=None):
+        super(HighResolutionNet, self).__init__()
+        self.opt = opt
+        self.norm_layer = norm_layer
+        self.width = width
+        self.ocr_width = ocr_width
+        self.ocr_on = ocr_width > 0
+        self.align_corners = align_corners
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn1 = norm_layer(64)
+        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn2 = norm_layer(64)
+        self.relu = nn.ReLU(inplace=relu_inplace)
+        num_blocks = 2 if small else 4
+        stage1_num_channels = 64
+        self.layer1 = self._make_layer(BottleneckV1b, 64, stage1_num_channels, blocks=num_blocks)
+        stage1_out_channel = BottleneckV1b.expansion * stage1_num_channels
+        self.stage2_num_branches = 2
+        num_channels = [width, 2 * width]
+        num_inchannels = [
+            num_channels[i] * BasicBlockV1b.expansion for i in range(len(num_channels))]
+        self.transition1 = self._make_transition_layer(
+            [stage1_out_channel], num_inchannels)
+        self.stage2, pre_stage_channels = self._make_stage(
+            BasicBlockV1b, num_inchannels=num_inchannels, num_modules=1, num_branches=self.stage2_num_branches,
+            num_blocks=2 * [num_blocks], num_channels=num_channels)
+        self.stage3_num_branches = 3
+        num_channels = [width, 2 * width, 4 * width]
+        num_inchannels = [
+            num_channels[i] * BasicBlockV1b.expansion for i in range(len(num_channels))]
+        self.transition2 = self._make_transition_layer(
+            pre_stage_channels, num_inchannels)
+        self.stage3, pre_stage_channels = self._make_stage(
+            BasicBlockV1b, num_inchannels=num_inchannels,
+            num_modules=3 if small else 4, num_branches=self.stage3_num_branches,
+            num_blocks=3 * [num_blocks], num_channels=num_channels)
+        self.stage4_num_branches = 4
+        num_channels = [width, 2 * width, 4 * width, 8 * width]
+        num_inchannels = [
+            num_channels[i] * BasicBlockV1b.expansion for i in range(len(num_channels))]
+        self.transition3 = self._make_transition_layer(
+            pre_stage_channels, num_inchannels)
+        self.stage4, pre_stage_channels = self._make_stage(
+            BasicBlockV1b, num_inchannels=num_inchannels, num_modules=2 if small else 3,
+            num_branches=self.stage4_num_branches,
+            num_blocks=4 * [num_blocks], num_channels=num_channels)
+        if self.ocr_on:
+            last_inp_channels = np.int(np.sum(pre_stage_channels))
+            ocr_mid_channels = 2 * ocr_width
+            ocr_key_channels = ocr_width
+            self.conv3x3_ocr = nn.Sequential(
+                nn.Conv2d(last_inp_channels, ocr_mid_channels,
+                          kernel_size=3, stride=1, padding=1),
+                norm_layer(ocr_mid_channels),
+                nn.ReLU(inplace=relu_inplace),
+            )
+            self.ocr_gather_head = SpatialGather_Module(num_classes)
+            self.ocr_distri_head = SpatialOCR_Module(in_channels=ocr_mid_channels,
+                                                     key_channels=ocr_key_channels,
+                                                     out_channels=ocr_mid_channels,
+                                                     scale=1,
+                                                     dropout=0.05,
+                                                     norm_layer=norm_layer,
+                                                     align_corners=align_corners, opt=opt)
+    def _make_transition_layer(
+            self, num_channels_pre_layer, num_channels_cur_layer):
+        num_branches_cur = len(num_channels_cur_layer)
+        num_branches_pre = len(num_channels_pre_layer)
+        transition_layers = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(nn.Sequential(
+                        nn.Conv2d(num_channels_pre_layer[i],
+                                  num_channels_cur_layer[i],
+                                  kernel_size=3,
+                                  stride=1,
+                                  padding=1,
+                                  bias=False),
+                        self.norm_layer(num_channels_cur_layer[i]),
+                        nn.ReLU(inplace=relu_inplace)))
+                else:
+                    transition_layers.append(None)
+            else:
+                conv3x3s = []
+                for j in range(i + 1 - num_branches_pre):
+                    inchannels = num_channels_pre_layer[-1]
+                    outchannels = num_channels_cur_layer[i] \
+                        if j == i - num_branches_pre else inchannels
+                    conv3x3s.append(nn.Sequential(
+                        nn.Conv2d(inchannels, outchannels,
+                                  kernel_size=3, stride=2, padding=1, bias=False),
+                        self.norm_layer(outchannels),
+                        nn.ReLU(inplace=relu_inplace)))
+                transition_layers.append(nn.Sequential(*conv3x3s))
+        return nn.ModuleList(transition_layers)
+    def _make_layer(self, block, inplanes, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                self.norm_layer(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(inplanes, planes, stride,
+                            downsample=downsample, norm_layer=self.norm_layer))
+        inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(inplanes, planes, norm_layer=self.norm_layer))
+        return nn.Sequential(*layers)
+    def _make_stage(self, block, num_inchannels,
+                    num_modules, num_branches, num_blocks, num_channels,
+                    fuse_method='SUM',
+                    multi_scale_output=True):
+        modules = []
+        for i in range(num_modules):
+            # multi_scale_output is only used last module
+            if not multi_scale_output and i == num_modules - 1:
+                reset_multi_scale_output = False
+            else:
+                reset_multi_scale_output = True
+            modules.append(
+                HighResolutionModule(num_branches,
+                                     block,
+                                     num_blocks,
+                                     num_inchannels,
+                                     num_channels,
+                                     fuse_method,
+                                     reset_multi_scale_output,
+                                     norm_layer=self.norm_layer,
+                                     align_corners=self.align_corners)
+            )
+            num_inchannels = modules[-1].get_num_inchannels()
+        return nn.Sequential(*modules), num_inchannels
+    def forward(self, x, mask=None, additional_features=None):
+        hrnet_feats = self.compute_hrnet_feats(x, additional_features)
+        if not self.ocr_on:
+            return hrnet_feats,
+        ocr_feats = self.conv3x3_ocr(hrnet_feats)
+        mask = nn.functional.interpolate(mask, size=ocr_feats.size()[2:], mode='bilinear', align_corners=True)
+        context = self.ocr_gather_head(ocr_feats, mask)
+        ocr_feats = self.ocr_distri_head(ocr_feats, context)
+        return ocr_feats,
+    def compute_hrnet_feats(self, x, additional_features, return_list=False):
+        x = self.compute_pre_stage_features(x, additional_features)
+        x = self.layer1(x)
+        x_list = []
+        for i in range(self.stage2_num_branches):
+            if self.transition1[i] is not None:
+                x_list.append(self.transition1[i](x))
+            else:
+                x_list.append(x)
+        y_list = self.stage2(x_list)
+        x_list = []
+        for i in range(self.stage3_num_branches):
+            if self.transition2[i] is not None:
+                if i < self.stage2_num_branches:
+                    x_list.append(self.transition2[i](y_list[i]))
+                else:
+                    x_list.append(self.transition2[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage3(x_list)
+        x_list = []
+        for i in range(self.stage4_num_branches):
+            if self.transition3[i] is not None:
+                if i < self.stage3_num_branches:
+                    x_list.append(self.transition3[i](y_list[i]))
+                else:
+                    x_list.append(self.transition3[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        x = self.stage4(x_list)
+        if return_list:
+            return x
+        # Upsampling
+        x0_h, x0_w = x[0].size(2), x[0].size(3)
+        x1 = F.interpolate(x[1], size=(x0_h, x0_w),
+                           mode='bilinear', align_corners=self.align_corners)
+        x2 = F.interpolate(x[2], size=(x0_h, x0_w),
+                           mode='bilinear', align_corners=self.align_corners)
+        x3 = F.interpolate(x[3], size=(x0_h, x0_w),
+                           mode='bilinear', align_corners=self.align_corners)
+        return torch.cat([x[0], x1, x2, x3], 1)
+    def compute_pre_stage_features(self, x, additional_features):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        if additional_features is not None:
+            x = x + additional_features
+        x = self.conv2(x)
+        x = self.bn2(x)
+        return self.relu(x)
+    def load_pretrained_weights(self, pretrained_path=''):
+        model_dict = self.state_dict()
+        if not os.path.exists(pretrained_path):
+            print(f'\nFile "{pretrained_path}" does not exist.')
+            print('You need to specify the correct path to the pre-trained weights.\n'
+                  'You can download the weights for HRNet from the repository:\n'
+                  'https://github.com/HRNet/HRNet-Image-Classification')
+            exit(1)
+        pretrained_dict = torch.load(pretrained_path, map_location={'cuda:0': 'cpu'})
+        pretrained_dict = {k.replace('last_layer', 'aux_head').replace('model.', ''): v for k, v in
+                           pretrained_dict.items()}
+        params_count = len(pretrained_dict)
+        pretrained_dict = {k: v for k, v in pretrained_dict.items()
+                           if k in model_dict.keys()}
+        # print(f'Loaded {len(pretrained_dict)} of {params_count} pretrained parameters for HRNet')
+        model_dict.update(pretrained_dict)
+        self.load_state_dict(model_dict)

inference_for_arbitrary_resolution_image.py CHANGED Viewed

@@ -276,6 +276,8 @@ def inference(model, opt, composite_image=None, mask=None):
                     mask,
                     fg_INR_coordinates,
                 )
             if opt.device == "cuda":
                 torch.cuda.reset_max_memory_allocated()
                 torch.cuda.reset_max_memory_cached()
@@ -325,12 +327,11 @@ def inference(model, opt, composite_image=None, mask=None):
 def main_process(opt, composite_image=None, mask=None):
     cudnn.benchmark = True
     model = build_model(opt).to(opt.device)
     load_dict = torch.load(opt.pretrained, map_location='cpu')['model']
-    for k in load_dict.keys():
-        if k not in model.state_dict().keys():
-            print(f"Skip {k}")
     model.load_state_dict(load_dict, strict=False)
     return inference(model, opt, composite_image, mask)

                     mask,
                     fg_INR_coordinates,
                 )
+                print("Ready for harmonization...")
             if opt.device == "cuda":
                 torch.cuda.reset_max_memory_allocated()
                 torch.cuda.reset_max_memory_cached()
 def main_process(opt, composite_image=None, mask=None):
     cudnn.benchmark = True
+    print("Preparing model...")
     model = build_model(opt).to(opt.device)
     load_dict = torch.load(opt.pretrained, map_location='cpu')['model']
     model.load_state_dict(load_dict, strict=False)
     return inference(model, opt, composite_image, mask)