Spaces:

qubvel-hf
/

documents-restoration

Running on Zero

App Files Files Community

qubvel-hf HF staff commited on May 11, 2024

Commit

c509e76

1 Parent(s): b012b87

Init project

Browse files

Files changed (41) hide show

README.md +59 -0
data/MBD/MBD.py +110 -0
data/MBD/MBD_utils.py +291 -0
data/MBD/infer.py +151 -0
data/MBD/model/__init__.py +50 -0
data/MBD/model/cbam.py +95 -0
data/MBD/model/deep_lab_model/__init__.py +0 -0
data/MBD/model/deep_lab_model/aspp.py +95 -0
data/MBD/model/deep_lab_model/backbone/__init__.py +13 -0
data/MBD/model/deep_lab_model/backbone/drn.py +402 -0
data/MBD/model/deep_lab_model/backbone/mobilenet.py +151 -0
data/MBD/model/deep_lab_model/backbone/resnet.py +170 -0
data/MBD/model/deep_lab_model/backbone/xception.py +288 -0
data/MBD/model/deep_lab_model/decoder.py +59 -0
data/MBD/model/deep_lab_model/deeplab.py +81 -0
data/MBD/model/deep_lab_model/sync_batchnorm/__init__.py +12 -0
data/MBD/model/deep_lab_model/sync_batchnorm/batchnorm.py +282 -0
data/MBD/model/deep_lab_model/sync_batchnorm/comm.py +129 -0
data/MBD/model/deep_lab_model/sync_batchnorm/replicate.py +88 -0
data/MBD/model/deep_lab_model/sync_batchnorm/unittest.py +29 -0
data/MBD/model/densenetccnl.py +382 -0
data/MBD/model/gienet.py +742 -0
data/MBD/model/unetnc.py +86 -0
data/MBD/modify_stn_model/stn_head.py +123 -0
data/MBD/modify_stn_model/tps_spatial_transformer.py +194 -0
data/MBD/stn_model/stn_head.py +123 -0
data/MBD/stn_model/tps_spatial_transformer.py +155 -0
data/MBD/tps_grid_gen.py +70 -0
data/MBD/utils.py +234 -0
data/README.md +135 -0
data/preprocess/crop_merge_image.py +142 -0
data/preprocess/sauvola_binarize.py +91 -0
data/preprocess/shadow_extraction.py +68 -0
eval.py +369 -0
inference.py +341 -0
loaders/docres_loader.py +558 -0
models/restormer_arch.py +308 -0
requirements.txt +10 -0
start_train.sh +1 -0
train.py +221 -0
utils.py +464 -0

README.md ADDED Viewed

	@@ -0,0 +1,59 @@

+<div align=center>
+# DocRes: A Generalist Model Toward Unifying Document Image Restoration Tasks
+</div>
+<p align="center">
+<img src="images/motivation.jpg" width="400">
+</p>
+This is the official implementation of our paper [DocRes: A Generalist Model Toward Unifying Document Image Restoration Tasks](https://arxiv.org/abs/2405.04408).
+## News
+🔥 A comprehensive [Recommendation for Document Image Processing](https://github.com/ZZZHANG-jx/Recommendations-Document-Image-Processing) is available.
+## Inference
+1. Put MBD model weights [mbd.pkl](https://1drv.ms/f/s!Ak15mSdV3Wy4iahoKckhDPVP5e2Czw?e=iClwdK) to `./data/MBD/checkpoint/`
+2. Put DocRes model weights [docres.pkl](https://1drv.ms/f/s!Ak15mSdV3Wy4iahoKckhDPVP5e2Czw?e=iClwdK) to `./checkpoints/`
+3. Run the following script and the results will be saved in `./restorted/`. We have provided some distorted examples in `./input/`.
+```bash
+python inference.py --im_path ./input/for_dewarping.png --task dewarping --save_dtsprompt 1
+```
+- `--im_path`: the path of input document image
+- `--task`: task that need to be executed, it must be one of _dewarping_, _deshadowing_, _appearance_, _deblurring_, _binarization_, or _end2end_
+- `--save_dtsprompt`: whether to save the DTSPrompt
+## Evaluation
+1. Dataset preparation, see [dataset instruction](./data/README.md)
+2. Put MBD model weights [mbd.pkl](https://1drv.ms/f/s!Ak15mSdV3Wy4iahoKckhDPVP5e2Czw?e=iClwdK) to `data/MBD/checkpoint/`
+3. Put DocRes model weights [docres.pkl](https://1drv.ms/f/s!Ak15mSdV3Wy4iahoKckhDPVP5e2Czw?e=iClwdK) to `./checkpoints/`
+2. Run the following script
+```bash
+python eval.py --dataset realdae
+```
+- `--dataset`: dataset that need to be evaluated, it can be set as _dir300_, _kligler_, _jung_, _osr_, _docunet\_docaligner_, _realdae_, _tdd_, and _dibco18_.
+## Training
+1. Dataset preparation, see [dataset instruction](./data/README.md)
+2. Specify the datasets_setting within `train.py` based on your dataset path and experimental setting.
+3. Run the following script
+```bash
+bash start_train.sh
+```
+## Citation:
+```
+@inproceedings{zhangdocres2024,
+Author = {Jiaxin Zhang, Dezhi Peng, Chongyu Liu , Peirong Zhang and Lianwen Jin},
+Booktitle = {In Proceedings of the IEEE/CV Conference on Computer Vision and Pattern Recognition},
+Title = {DocRes: A Generalist Model Toward Unifying Document Image Restoration Tasks},
+Year = {2024}}
+```
+## ⭐ Star Rising
+[![Star Rising](https://api.star-history.com/svg?repos=ZZZHANG-jx/DocRes&type=Timeline)](https://star-history.com/#ZZZHANG-jx/DocRes&Timeline)

data/MBD/MBD.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import cv2
+import numpy as np
+import MBD_utils
+import torch
+import torch.nn.functional as F
+def mask_base_dewarper(image,mask):
+    '''
+    input:
+        image -> ndarray HxWx3 uint8
+        mask -> ndarray HxW uint8
+    return
+        dewarped -> ndarray HxWx3 uint8
+        grid (optional) -> ndarray HxWx2 -1~1
+    '''
+    ## get contours
+    # _, contours, hierarchy = cv2.findContours(mask,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)  ## cv2.__version__ == 3.x
+    contours,hierarchy = cv2.findContours(mask,cv2.RETR_EXTERNAL,method=cv2.CHAIN_APPROX_SIMPLE)  ## cv2.__version__ == 4.x
+    ## get biggest contours and four corners based on Douglas-Peucker algorithm
+    four_corners, maxArea, contour= MBD_utils.DP_algorithm(contours)
+    four_corners = MBD_utils.reorder(four_corners)
+    ## reserve biggest contours and remove other noisy contours
+    new_mask = np.zeros_like(mask)
+    new_mask = cv2.drawContours(new_mask,[contour],-1,255,cv2.FILLED)
+    ## obtain middle points
+    # ratios = [0.25,0.5,0.75]  # ratios = [0.125,0.25,0.375,0.5,0.625,0.75,0.875]
+    ratios = [0.25,0.5,0.75]
+    # ratios = [0.0625,0.125,0.1875,0.25,0.3125,0.375,0.4475,0.5,0.5625,0.625,0.06875,0.75,0.8125,0.875,0.9375]
+    middle = MBD_utils.findMiddle(corners=four_corners,mask=new_mask,points=ratios)
+    ## all points
+    source_points = np.concatenate((four_corners,middle),axis=0) ## all_point = four_corners(topleft,topright,bottom)+top+bottom+left+right
+    ## target points
+    h,w = image.shape[:2]
+    padding = 0
+    target_points = [[padding, padding],[w-padding, padding], [padding, h-padding],[w-padding, h-padding]]
+    for ratio in ratios:
+        target_points.append([int((w-2*padding)*ratio)+padding,padding])
+    for ratio in ratios:
+        target_points.append([int((w-2*padding)*ratio)+padding,h-padding])
+    for ratio in ratios:
+        target_points.append([padding,int((h-2*padding)*ratio)+padding])
+    for ratio in ratios:
+        target_points.append([w-padding,int((h-2*padding)*ratio)+padding])
+    ## dewarp base on cv2
+    # pts1 = np.float32(source_points)
+    # pts2 = np.float32(target_points)
+    # tps = cv2.createThinPlateSplineShapeTransformer()
+    # matches = []
+    # N = pts1.shape[0]
+    # for i in range(0,N):
+    #     matches.append(cv2.DMatch(i,i,0))
+    # pts1 = pts1.reshape(1,-1,2)
+    # pts2 = pts2.reshape(1,-1,2)
+    # tps.estimateTransformation(pts2,pts1,matches)
+    # dewarped = tps.warpImage(image)
+    ## dewarp base on generated grid
+    source_points = source_points.reshape(-1,2)/np.array([image.shape[:2][::-1]]).reshape(1,2)
+    source_points = torch.from_numpy(source_points).float().cuda()
+    source_points = source_points.unsqueeze(0)
+    source_points = (source_points-0.5)*2
+    target_points = np.asarray(target_points).reshape(-1,2)/np.array([image.shape[:2][::-1]]).reshape(1,2)
+    target_points = torch.from_numpy(target_points).float()
+    target_points = (target_points-0.5)*2
+    model = MBD_utils.TPSGridGen(target_height=256,target_width=256,target_control_points=target_points)
+    model = model.cuda()
+    grid = model(source_points).view(-1,256,256,2).permute(0,3,1,2)
+    grid = F.interpolate(grid,(h,w),mode='bilinear').permute(0,2,3,1)
+    dewarped = MBD_utils.torch2cvimg(F.grid_sample(MBD_utils.cvimg2torch(image).cuda(),grid))[0]
+    return dewarped,grid[0].cpu().numpy()
+def mask_base_cropper(image,mask):
+    '''
+    input:
+        image -> ndarray HxWx3 uint8
+        mask -> ndarray HxW uint8
+    return
+        dewarped -> ndarray HxWx3 uint8
+        grid (optional) -> ndarray HxWx2 -1~1
+    '''
+    ## get contours
+    _, contours, hierarchy = cv2.findContours(mask,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)  ## cv2.__version__ == 3.x
+    # contours,hierarchy = cv2.findContours(mask,cv2.RETR_EXTERNAL,method=cv2.CHAIN_APPROX_SIMPLE)  ## cv2.__version__ == 4.x
+    ## get biggest contours and four corners based on Douglas-Peucker algorithm
+    four_corners, maxArea, contour= MBD_utils.DP_algorithm(contours)
+    four_corners = MBD_utils.reorder(four_corners)
+    ## reserve biggest contours and remove other noisy contours
+    new_mask = np.zeros_like(mask)
+    new_mask = cv2.drawContours(new_mask,[contour],-1,255,cv2.FILLED)
+    ## 最小外接矩形
+    rect = cv2.minAreaRect(contour) # 得到最小外接矩形的（中心(x,y), (宽,高), 旋转角度）
+    box = cv2.boxPoints(rect) # cv2.boxPoints(rect) for OpenCV 3.x 获取最小外接矩形的4个顶点坐标
+    box = np.int0(box)
+    box = box.reshape((4,1,2))

data/MBD/MBD_utils.py ADDED Viewed

	@@ -0,0 +1,291 @@

+import cv2
+import numpy as np
+import copy
+import torch
+import torch
+import itertools
+import torch.nn as nn
+from torch.autograd import Function, Variable
+def reorder(myPoints):
+    myPoints = myPoints.reshape((4, 2))
+    myPointsNew = np.zeros((4, 1, 2), dtype=np.int32)
+    add = myPoints.sum(1)
+    myPointsNew[0] = myPoints[np.argmin(add)]
+    myPointsNew[3] =myPoints[np.argmax(add)]
+    diff = np.diff(myPoints, axis=1)
+    myPointsNew[1] =myPoints[np.argmin(diff)]
+    myPointsNew[2] = myPoints[np.argmax(diff)]
+    return myPointsNew
+def findMiddle(corners,mask,points=[0.25,0.5,0.75]):
+    num_middle_points = len(points)
+    top = [np.array([])]*num_middle_points
+    bottom = [np.array([])]*num_middle_points
+    left = [np.array([])]*num_middle_points
+    right = [np.array([])]*num_middle_points
+    center_top = []
+    center_bottom = []
+    center_left = []
+    center_right = []
+    center = (int((corners[0][0][1]+corners[3][0][1])/2),int((corners[0][0][0]+corners[3][0][0])/2))
+    for ratio in points:
+        center_top.append( (center[0],int(corners[0][0][0]*(1-ratio)+corners[1][0][0]*ratio)) )
+        center_bottom.append( (center[0],int(corners[2][0][0]*(1-ratio)+corners[3][0][0]*ratio)) )
+        center_left.append( (int(corners[0][0][1]*(1-ratio)+corners[2][0][1]*ratio),center[1]) )
+        center_right.append( (int(corners[1][0][1]*(1-ratio)+corners[3][0][1]*ratio),center[1]) )
+    for i in range(0,center[0],1):
+        for j in range(num_middle_points):
+            if top[j].size==0:
+                if mask[i,center_top[j][1]]==255:
+                    top[j] = np.asarray([center_top[j][1],i])
+                    top[j] = top[j].reshape(1,2)
+    for i in range(mask.shape[0]-1,center[0],-1):
+        for j in range(num_middle_points):
+            if bottom[j].size==0:
+                if mask[i,center_bottom[j][1]]==255:
+                    bottom[j] = np.asarray([center_bottom[j][1],i])
+                    bottom[j] = bottom[j].reshape(1,2)
+    for i in range(mask.shape[1]-1,center[1],-1):
+        for j in range(num_middle_points):
+            if right[j].size==0:
+                if mask[center_right[j][0],i]==255:
+                    right[j] = np.asarray([i,center_right[j][0]])
+                    right[j] = right[j].reshape(1,2)
+    for i in range(0,center[1]):
+        for j in range(num_middle_points):
+            if left[j].size==0:
+                if mask[center_left[j][0],i]==255:
+                    left[j] = np.asarray([i,center_left[j][0]])
+                    left[j] = left[j].reshape(1,2)
+    return np.asarray(top+bottom+left+right)
+def DP_algorithmv1(contours):
+    biggest = np.array([])
+    max_area = 0
+    step = 0.001
+    count = 0
+    # while biggest.size==0:
+    while True:
+        for i in contours:
+            # print(i.shape)
+            area = cv2.contourArea(i)
+            # print(area,cv2.arcLength(i, True))
+            if area > cv2.arcLength(i, True)*10:
+                peri = cv2.arcLength(i, True)
+                approx = cv2.approxPolyDP(i, (0.01+step*count) * peri, True)
+                if area > max_area and len(approx) == 4:
+                    max_area = area
+                    biggest_contours = i
+                    biggest = approx
+                    break
+                    if abs(max_area - cv2.contourArea(biggest))/max_area > 0.3:
+                        biggest = np.array([])
+        count += 1
+        if count > 200:
+            break
+    temp = biggest[0]
+    return biggest,max_area, biggest_contours
+def DP_algorithm(contours):
+    biggest = np.array([])
+    max_area = 0
+    step = 0.001
+    count = 0
+    ### largest contours
+    for i in contours:
+        area = cv2.contourArea(i)
+        if area > max_area:
+            max_area = area
+            biggest_contours = i
+    peri = cv2.arcLength(biggest_contours, True)
+    ### find four corners
+    while True:
+        approx = cv2.approxPolyDP(biggest_contours, (0.01+step*count) * peri, True)
+        if len(approx) == 4:
+            biggest = approx
+            break
+            # if abs(max_area - cv2.contourArea(biggest))/max_area > 0.2:
+            # if abs(max_area - cv2.contourArea(biggest))/max_area > 0.4:
+                # biggest = np.array([])
+        count += 1
+        if count > 200:
+            break
+    return biggest,max_area, biggest_contours
+def drawRectangle(img,biggest,color,thickness):
+    cv2.line(img, (biggest[0][0][0], biggest[0][0][1]), (biggest[1][0][0], biggest[1][0][1]), color, thickness)
+    cv2.line(img, (biggest[0][0][0], biggest[0][0][1]), (biggest[2][0][0], biggest[2][0][1]), color, thickness)
+    cv2.line(img, (biggest[3][0][0], biggest[3][0][1]), (biggest[2][0][0], biggest[2][0][1]), color, thickness)
+    cv2.line(img, (biggest[3][0][0], biggest[3][0][1]), (biggest[1][0][0], biggest[1][0][1]), color, thickness)
+    return img
+def minAreaRect(contours,img):
+    # biggest = np.array([])
+    max_area = 0
+    for i in contours:
+        area = cv2.contourArea(i)
+        if area > max_area:
+            peri = cv2.arcLength(i, True)
+            rect = cv2.minAreaRect(i)
+            points = cv2.boxPoints(rect)
+            max_area = area
+    return points
+def cropRectangle(img,biggest):
+    # print(biggest)
+    w = np.abs(biggest[0][0][0] - biggest[1][0][0])
+    h = np.abs(biggest[0][0][1] - biggest[2][0][1])
+    new_img = np.zeros((w,h,img.shape[-1]),dtype=np.uint8)
+    new_img = img[biggest[0][0][1]:biggest[0][0][1]+h,biggest[0][0][0]:biggest[0][0][0]+w]
+    return new_img
+def cvimg2torch(img,min=0,max=1):
+    '''
+    input:
+        im -> ndarray uint8 HxWxC
+    return
+        tensor -> torch.tensor BxCxHxW
+    '''
+    if len(img.shape)==2:
+        img = np.expand_dims(img,axis=-1)
+    img = img.astype(float) / 255.0
+    img = img.transpose(2, 0, 1) # NHWC -> NCHW
+    img = np.expand_dims(img, 0)
+    img = torch.from_numpy(img).float()
+    return img
+def torch2cvimg(tensor,min=0,max=1):
+    '''
+    input:
+        tensor -> torch.tensor BxCxHxW C can be 1,3
+    return
+        im -> ndarray uint8 HxWxC
+    '''
+    im_list = []
+    for i in range(tensor.shape[0]):
+        im = tensor.detach().cpu().data.numpy()[i]
+        im = im.transpose(1,2,0)
+        im = np.clip(im,min,max)
+        im = ((im-min)/(max-min)*255).astype(np.uint8)
+        im_list.append(im)
+    return im_list
+class TPSGridGen(nn.Module):
+    def __init__(self, target_height, target_width, target_control_points):
+        '''
+        target_control_points -> torch.tensor  num_pointx2 -1~1
+        source_control_points -> torch.tensor  batch_size x num_point x 2 -1~1
+        return:
+            grid -> batch_size x hw x 2 -1~1
+        '''
+        super(TPSGridGen, self).__init__()
+        assert target_control_points.ndimension() == 2
+        assert target_control_points.size(1) == 2
+        N = target_control_points.size(0)
+        self.num_points = N
+        target_control_points = target_control_points.float()
+        # create padded kernel matrix
+        forward_kernel = torch.zeros(N + 3, N + 3)
+        target_control_partial_repr = self.compute_partial_repr(target_control_points, target_control_points)
+        forward_kernel[:N, :N].copy_(target_control_partial_repr)
+        forward_kernel[:N, -3].fill_(1)
+        forward_kernel[-3, :N].fill_(1)
+        forward_kernel[:N, -2:].copy_(target_control_points)
+        forward_kernel[-2:, :N].copy_(target_control_points.transpose(0, 1))
+        # compute inverse matrix
+        inverse_kernel = torch.inverse(forward_kernel)
+        # create target cordinate matrix
+        HW = target_height * target_width
+        target_coordinate = list(itertools.product(range(target_height), range(target_width)))
+        target_coordinate = torch.Tensor(target_coordinate) # HW x 2
+        Y, X = target_coordinate.split(1, dim = 1)
+        Y = Y * 2 / (target_height - 1) - 1
+        X = X * 2 / (target_width - 1) - 1
+        target_coordinate = torch.cat([X, Y], dim = 1) # convert from (y, x) to (x, y)
+        target_coordinate_partial_repr = self.compute_partial_repr(target_coordinate.to(target_control_points.device), target_control_points)
+        target_coordinate_repr = torch.cat([
+            target_coordinate_partial_repr, torch.ones(HW, 1), target_coordinate
+        ], dim = 1)
+        # register precomputed matrices
+        self.register_buffer('inverse_kernel', inverse_kernel)
+        self.register_buffer('padding_matrix', torch.zeros(3, 2))
+        self.register_buffer('target_coordinate_repr', target_coordinate_repr)
+    def forward(self, source_control_points):
+        assert source_control_points.ndimension() == 3
+        assert source_control_points.size(1) == self.num_points
+        assert source_control_points.size(2) == 2
+        batch_size = source_control_points.size(0)
+        Y = torch.cat([source_control_points, Variable(self.padding_matrix.expand(batch_size, 3, 2))], 1)
+        mapping_matrix = torch.matmul(Variable(self.inverse_kernel), Y)
+        source_coordinate = torch.matmul(Variable(self.target_coordinate_repr), mapping_matrix)
+        return source_coordinate
+    # phi(x1, x2) = r^2 * log(r), where r = ||x1 - x2||_2
+    def compute_partial_repr(self, input_points, control_points):
+        N = input_points.size(0)
+        M = control_points.size(0)
+        pairwise_diff = input_points.view(N, 1, 2) - control_points.view(1, M, 2)
+        # original implementation, very slow
+        # pairwise_dist = torch.sum(pairwise_diff ** 2, dim = 2) # square of distance
+        pairwise_diff_square = pairwise_diff * pairwise_diff
+        pairwise_dist = pairwise_diff_square[:, :, 0] + pairwise_diff_square[:, :, 1]
+        repr_matrix = 0.5 * pairwise_dist * torch.log(pairwise_dist)
+        # fix numerical error for 0 * log(0), substitute all nan with 0
+        mask = repr_matrix != repr_matrix
+        repr_matrix.masked_fill_(mask, 0)
+        return repr_matrix
+    ### deside wheather further process
+    # point_area = cv2.contourArea(np.concatenate((biggest_angle[0].reshape(1,1,2),middle[0:3],biggest_angle[1].reshape(1,1,2),middle[9:12],biggest_angle[3].reshape(1,1,2),middle[3:6][::-1],biggest_angle[2].reshape(1,1,2),middle[6:9][::-1]),axis=0))
+    #### 最小外接矩形
+    # rect = cv2.minAreaRect(contour) # 得到最小外接矩形的（中心(x,y), (宽,高), 旋转角度）
+    # box = cv2.boxPoints(rect) # cv2.boxPoints(rect) for OpenCV 3.x 获取最小外接矩形的4个顶点坐标
+    # box = np.int0(box)
+    # box = box.reshape((4,1,2))
+    # minrect_area = cv2.contourArea(box)
+    # print(abs(minrect_area-point_area)/point_area)
+    #### 四个角点 IOU
+    # biggest_box = np.concatenate((biggest_angle[0,:,:].reshape(1,1,2),biggest_angle[2,:,:].reshape(1,1,2),biggest_angle[3,:,:].reshape(1,1,2),biggest_angle[1,:,:].reshape(1,1,2)),axis=0)
+    # biggest_mask = np.zeros_like(mask)
+    # # corner_area = cv2.contourArea(biggest_box)
+    # cv2.drawContours(biggest_mask,[biggest_box], -1, color=255, thickness=-1)
+    # smooth = 1e-5
+    # biggest_mask_ = biggest_mask > 50
+    # mask_ = mask > 50
+    # intersection = (biggest_mask_ & mask_).sum()
+    # union = (biggest_mask_ | mask_).sum()
+    # iou = (intersection + smooth) / (union + smooth)
+    # if iou > 0.975:
+    #     skip = True
+    # else:
+    #     skip = False
+    # print(iou)
+    # cv2.imshow('mask',cv2.resize(mask,(512,512)))
+    # cv2.imshow('biggest_mask',cv2.resize(biggest_mask,(512,512)))
+    # cv2.waitKey(0)

data/MBD/infer.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import torch
+import argparse
+import numpy as np
+import torch.nn.functional as F
+import glob
+import cv2
+from tqdm import tqdm
+import time
+import os
+from model.deep_lab_model.deeplab import *
+from MBD import mask_base_dewarper
+import time
+from utils import cvimg2torch,torch2cvimg
+def net1_net2_infer(model,img_paths,args):
+    ### validate on the real datasets
+    seg_model=model
+    seg_model.eval()
+    for img_path in tqdm(img_paths):
+        if os.path.exists(img_path.replace('_origin','_capture')):
+            continue
+        t1 = time.time()
+        ### segmentation mask predict
+        img_org = cv2.imread(img_path)
+        h_org,w_org = img_org.shape[:2]
+        img = cv2.resize(img_org,(448, 448))
+        img = cv2.GaussianBlur(img,(15,15),0,0)
+        img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
+        img = cvimg2torch(img)
+        with torch.no_grad():
+            pred = seg_model(img.cuda())
+            mask_pred = pred[:,0,:,:].unsqueeze(1)
+            mask_pred = F.interpolate(mask_pred,(h_org,w_org))
+            mask_pred = mask_pred.squeeze(0).squeeze(0).cpu().numpy()
+            mask_pred = (mask_pred*255).astype(np.uint8)
+            kernel = np.ones((3,3))
+            mask_pred = cv2.dilate(mask_pred,kernel,iterations=3)
+            mask_pred = cv2.erode(mask_pred,kernel,iterations=3)
+            mask_pred[mask_pred>100] = 255
+            mask_pred[mask_pred<100] = 0
+            ### tps transform base on the mask
+            # dewarp, grid = mask_base_dewarper(img_org,mask_pred)
+        try:
+            dewarp, grid = mask_base_dewarper(img_org,mask_pred)
+        except:
+            print('fail')
+            grid = np.meshgrid(np.arange(w_org),np.arange(h_org))/np.array([w_org,h_org]).reshape(2,1,1)
+            grid = torch.from_numpy((grid-0.5)*2).float().unsqueeze(0).permute(0,2,3,1)
+            dewarp = torch2cvimg(F.grid_sample(cvimg2torch(img_org),grid))[0]
+            grid = grid[0].numpy()
+        # cv2.imshow('in',cv2.resize(img_org,(512,512)))
+        # cv2.imshow('out',cv2.resize(dewarp,(512,512)))
+        # cv2.waitKey(0)
+        cv2.imwrite(img_path.replace('_origin','_capture'),dewarp)
+        cv2.imwrite(img_path.replace('_origin','_mask_new'),mask_pred)
+        grid0 = cv2.resize(grid[:,:,0],(128,128))
+        grid1 = cv2.resize(grid[:,:,1],(128,128))
+        grid = np.stack((grid0,grid1),axis=-1)
+        np.save(img_path.replace('_origin','_grid1'),grid)
+def net1_net2_infer_single_im(img,model_path):
+    seg_model = DeepLab(num_classes=1,
+                    backbone='resnet',
+                    output_stride=16,
+                    sync_bn=None,
+                    freeze_bn=False)
+    seg_model = torch.nn.DataParallel(seg_model, device_ids=range(torch.cuda.device_count()))
+    seg_model.cuda()
+    checkpoint = torch.load(model_path)
+    seg_model.load_state_dict(checkpoint['model_state'])
+    ### validate on the real datasets
+    seg_model.eval()
+    ### segmentation mask predict
+    img_org = img
+    h_org,w_org = img_org.shape[:2]
+    img = cv2.resize(img_org,(448, 448))
+    img = cv2.GaussianBlur(img,(15,15),0,0)
+    img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
+    img = cvimg2torch(img)
+    with torch.no_grad():
+        # from torchtoolbox.tools import summary
+        # print(summary(seg_model,torch.rand((1, 3, 448, 448)).cuda())) 59.4M 135.6G
+        pred = seg_model(img.cuda())
+        mask_pred = pred[:,0,:,:].unsqueeze(1)
+        mask_pred = F.interpolate(mask_pred,(h_org,w_org))
+        mask_pred = mask_pred.squeeze(0).squeeze(0).cpu().numpy()
+        mask_pred = (mask_pred*255).astype(np.uint8)
+        kernel = np.ones((3,3))
+        mask_pred = cv2.dilate(mask_pred,kernel,iterations=3)
+        mask_pred = cv2.erode(mask_pred,kernel,iterations=3)
+        mask_pred[mask_pred>100] = 255
+        mask_pred[mask_pred<100] = 0
+        ### tps transform base on the mask
+        # dewarp, grid = mask_base_dewarper(img_org,mask_pred)
+    # try:
+    #     dewarp, grid = mask_base_dewarper(img_org,mask_pred)
+    # except:
+    #     print('fail')
+    #     grid = np.meshgrid(np.arange(w_org),np.arange(h_org))/np.array([w_org,h_org]).reshape(2,1,1)
+    #     grid = torch.from_numpy((grid-0.5)*2).float().unsqueeze(0).permute(0,2,3,1)
+    #     dewarp = torch2cvimg(F.grid_sample(cvimg2torch(img_org),grid))[0]
+    #     grid = grid[0].numpy()
+    # cv2.imshow('in',cv2.resize(img_org,(512,512)))
+    # cv2.imshow('out',cv2.resize(dewarp,(512,512)))
+    # cv2.waitKey(0)
+    # cv2.imwrite(img_path.replace('_origin','_capture'),dewarp)
+    # cv2.imwrite(img_path.replace('_origin','_mask_new'),mask_pred)
+    # grid0 = cv2.resize(grid[:,:,0],(128,128))
+    # grid1 = cv2.resize(grid[:,:,1],(128,128))
+    # grid = np.stack((grid0,grid1),axis=-1)
+    # np.save(img_path.replace('_origin','_grid1'),grid)
+    return mask_pred
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Hyperparams')
+    parser.add_argument('--img_folder', nargs='?', type=str, default='./all_data',help='Data path to load data')
+    parser.add_argument('--img_rows', nargs='?', type=int, default=448,
+                        help='Height of the input image')
+    parser.add_argument('--img_cols', nargs='?', type=int, default=448,
+                        help='Width of the input image')
+    parser.add_argument('--seg_model_path', nargs='?', type=str, default='checkpoints/mbd.pkl',
+                        help='Path to previous saved model to restart from')
+    args = parser.parse_args()
+    seg_model = DeepLab(num_classes=1,
+                    backbone='resnet',
+                    output_stride=16,
+                    sync_bn=None,
+                    freeze_bn=False)
+    seg_model = torch.nn.DataParallel(seg_model, device_ids=range(torch.cuda.device_count()))
+    seg_model.cuda()
+    checkpoint = torch.load(args.seg_model_path)
+    seg_model.load_state_dict(checkpoint['model_state'])
+    im_paths = glob.glob(os.path.join(args.img_folder,'*_origin.*'))
+    net1_net2_infer(seg_model,im_paths,args)

data/MBD/model/__init__.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import torchvision.models as models
+from model.densenetccnl import *
+from model.unetnc import *
+from model.gienet import *
+def get_model(name, n_classes=1, filters=64,version=None,in_channels=3, is_batchnorm=True, norm='batch', model_path=None, use_sigmoid=True, layers=3,img_size=512):
+    model = _get_model_instance(name)
+    if name == 'dnetccnl':
+        model = model(img_size=128, in_channels=in_channels, out_channels=n_classes, filters=32)
+    elif name == 'dnetccnl512':
+        model = model(img_size=img_size, in_channels=in_channels, out_channels=n_classes, filters=32)
+    elif name == 'unetnc':
+        model = model(input_nc=in_channels, output_nc=n_classes, num_downs=7)
+    elif name == 'gie':
+        model = model(input_nc=in_channels, output_nc=n_classes, num_downs=7)
+    elif name == 'giecbam':
+        model = model(input_nc=in_channels, output_nc=n_classes, num_downs=7)
+    elif name == 'gie2head':
+        model = model(input_nc=in_channels, output_nc=n_classes, num_downs=7)
+    elif name == 'giemask':
+        model = model(input_nc=in_channels, output_nc=n_classes, num_downs=7)
+    elif name == 'giemask2':
+        model = model(input_nc=in_channels, output_nc=n_classes, num_downs=7)
+    elif name == 'giedilated':
+        model = model(input_nc=in_channels, output_nc=n_classes, num_downs=7)
+    elif name == 'bmp':
+        model = model(input_nc=in_channels, output_nc=n_classes, num_downs=7)
+    elif name == 'displacement':
+        model = model(n_classes=2, num_filter=32, BatchNorm='GN', in_channels=5)
+    return model
+def _get_model_instance(name):
+    try:
+        return {
+            'dnetccnl': dnetccnl,
+            'dnetccnl512': dnetccnl512,
+            'unetnc': UnetGenerator,
+            'gie':GieGenerator,
+            'giecbam':GiecbamGenerator,
+            'giedilated':DilatedSingleUnet,
+            'gie2head':Gie2headGenerator,
+            'giemask':GiemaskGenerator,
+            'giemask2':Giemask2Generator,
+            'bmp':BmpGenerator,
+        }[name]
+    except:
+        print('Model {} not available'.format(name))

data/MBD/model/cbam.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import torch
+import math
+import torch.nn as nn
+import torch.nn.functional as F
+class BasicConv(nn.Module):
+    def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False):
+        super(BasicConv, self).__init__()
+        self.out_channels = out_planes
+        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
+        self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01, affine=True) if bn else None
+        self.relu = nn.ReLU() if relu else None
+    def forward(self, x):
+        x = self.conv(x)
+        if self.bn is not None:
+            x = self.bn(x)
+        if self.relu is not None:
+            x = self.relu(x)
+        return x
+class Flatten(nn.Module):
+    def forward(self, x):
+        return x.view(x.size(0), -1)
+class ChannelGate(nn.Module):
+    def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max']):
+        super(ChannelGate, self).__init__()
+        self.gate_channels = gate_channels
+        self.mlp = nn.Sequential(
+            Flatten(),
+            nn.Linear(gate_channels, gate_channels // reduction_ratio),
+            nn.ReLU(),
+            nn.Linear(gate_channels // reduction_ratio, gate_channels)
+            )
+        self.pool_types = pool_types
+    def forward(self, x):
+        channel_att_sum = None
+        for pool_type in self.pool_types:
+            if pool_type=='avg':
+                avg_pool = F.avg_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
+                channel_att_raw = self.mlp( avg_pool )
+            elif pool_type=='max':
+                max_pool = F.max_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
+                channel_att_raw = self.mlp( max_pool )
+            elif pool_type=='lp':
+                lp_pool = F.lp_pool2d( x, 2, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
+                channel_att_raw = self.mlp( lp_pool )
+            elif pool_type=='lse':
+                # LSE pool only
+                lse_pool = logsumexp_2d(x)
+                channel_att_raw = self.mlp( lse_pool )
+            if channel_att_sum is None:
+                channel_att_sum = channel_att_raw
+            else:
+                channel_att_sum = channel_att_sum + channel_att_raw
+        scale = F.sigmoid( channel_att_sum ).unsqueeze(2).unsqueeze(3).expand_as(x)
+        return x * scale
+def logsumexp_2d(tensor):
+    tensor_flatten = tensor.view(tensor.size(0), tensor.size(1), -1)
+    s, _ = torch.max(tensor_flatten, dim=2, keepdim=True)
+    outputs = s + (tensor_flatten - s).exp().sum(dim=2, keepdim=True).log()
+    return outputs
+class ChannelPool(nn.Module):
+    def forward(self, x):
+        return torch.cat( (torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1 )
+class SpatialGate(nn.Module):
+    def __init__(self):
+        super(SpatialGate, self).__init__()
+        kernel_size = 7
+        self.compress = ChannelPool()
+        self.spatial = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size-1) // 2, relu=False)
+    def forward(self, x):
+        x_compress = self.compress(x)
+        x_out = self.spatial(x_compress)
+        scale = F.sigmoid(x_out) # broadcasting
+        return x * scale
+class CBAM(nn.Module):
+    def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False):
+        super(CBAM, self).__init__()
+        self.ChannelGate = ChannelGate(gate_channels, reduction_ratio, pool_types)
+        self.no_spatial=no_spatial
+        if not no_spatial:
+            self.SpatialGate = SpatialGate()
+    def forward(self, x):
+        x_out = self.ChannelGate(x)
+        if not self.no_spatial:
+            x_out = self.SpatialGate(x_out)
+        return x_out

data/MBD/model/deep_lab_model/__init__.py ADDED Viewed

File without changes

data/MBD/model/deep_lab_model/aspp.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from model.deep_lab_model.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
+class _ASPPModule(nn.Module):
+    def __init__(self, inplanes, planes, kernel_size, padding, dilation, BatchNorm):
+        super(_ASPPModule, self).__init__()
+        self.atrous_conv = nn.Conv2d(inplanes, planes, kernel_size=kernel_size,
+                                            stride=1, padding=padding, dilation=dilation, bias=False)
+        self.bn = BatchNorm(planes)
+        self.relu = nn.ReLU()
+        self._init_weight()
+    def forward(self, x):
+        x = self.atrous_conv(x)
+        x = self.bn(x)
+        return self.relu(x)
+    def _init_weight(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                torch.nn.init.kaiming_normal_(m.weight)
+            elif isinstance(m, SynchronizedBatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+class ASPP(nn.Module):
+    def __init__(self, backbone, output_stride, BatchNorm):
+        super(ASPP, self).__init__()
+        if backbone == 'drn':
+            inplanes = 512
+        elif backbone == 'mobilenet':
+            inplanes = 320
+        else:
+            inplanes = 2048
+        if output_stride == 16:
+            dilations = [1, 6, 12, 18]
+        elif output_stride == 8:
+            dilations = [1, 12, 24, 36]
+        else:
+            raise NotImplementedError
+        self.aspp1 = _ASPPModule(inplanes, 256, 1, padding=0, dilation=dilations[0], BatchNorm=BatchNorm)
+        self.aspp2 = _ASPPModule(inplanes, 256, 3, padding=dilations[1], dilation=dilations[1], BatchNorm=BatchNorm)
+        self.aspp3 = _ASPPModule(inplanes, 256, 3, padding=dilations[2], dilation=dilations[2], BatchNorm=BatchNorm)
+        self.aspp4 = _ASPPModule(inplanes, 256, 3, padding=dilations[3], dilation=dilations[3], BatchNorm=BatchNorm)
+        self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
+                                             nn.Conv2d(inplanes, 256, 1, stride=1, bias=False),
+                                             BatchNorm(256),
+                                             nn.ReLU())
+        self.conv1 = nn.Conv2d(1280, 256, 1, bias=False)
+        self.bn1 = BatchNorm(256)
+        self.relu = nn.ReLU()
+        self.dropout = nn.Dropout(0.5)
+        self._init_weight()
+    def forward(self, x):
+        x1 = self.aspp1(x)
+        x2 = self.aspp2(x)
+        x3 = self.aspp3(x)
+        x4 = self.aspp4(x)
+        x5 = self.global_avg_pool(x)
+        x5 = F.interpolate(x5, size=x4.size()[2:], mode='bilinear', align_corners=True)
+        x = torch.cat((x1, x2, x3, x4, x5), dim=1)
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        return self.dropout(x)
+    def _init_weight(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                # m.weight.data.normal_(0, math.sqrt(2. / n))
+                torch.nn.init.kaiming_normal_(m.weight)
+            elif isinstance(m, SynchronizedBatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+def build_aspp(backbone, output_stride, BatchNorm):
+    return ASPP(backbone, output_stride, BatchNorm)

data/MBD/model/deep_lab_model/backbone/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from model.deep_lab_model.backbone import resnet, xception, drn, mobilenet
+def build_backbone(backbone, output_stride, BatchNorm):
+    if backbone == 'resnet':
+        return resnet.ResNet101(output_stride, BatchNorm)
+    elif backbone == 'xception':
+        return xception.AlignedXception(output_stride, BatchNorm)
+    elif backbone == 'drn':
+        return drn.drn_d_54(BatchNorm)
+    elif backbone == 'mobilenet':
+        return mobilenet.MobileNetV2(output_stride, BatchNorm)
+    else:
+        raise NotImplementedError

data/MBD/model/deep_lab_model/backbone/drn.py ADDED Viewed

	@@ -0,0 +1,402 @@

+import torch.nn as nn
+import math
+import torch.utils.model_zoo as model_zoo
+from model.deep_lab_model.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
+webroot = 'http://dl.yf.io/drn/'
+model_urls = {
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'drn-c-26': webroot + 'drn_c_26-ddedf421.pth',
+    'drn-c-42': webroot + 'drn_c_42-9d336e8c.pth',
+    'drn-c-58': webroot + 'drn_c_58-0a53a92c.pth',
+    'drn-d-22': webroot + 'drn_d_22-4bd2f8ea.pth',
+    'drn-d-38': webroot + 'drn_d_38-eebb45f0.pth',
+    'drn-d-54': webroot + 'drn_d_54-0e0534ff.pth',
+    'drn-d-105': webroot + 'drn_d_105-12b40979.pth'
+}
+def conv3x3(in_planes, out_planes, stride=1, padding=1, dilation=1):
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=padding, bias=False, dilation=dilation)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None,
+                 dilation=(1, 1), residual=True, BatchNorm=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride,
+                             padding=dilation[0], dilation=dilation[0])
+        self.bn1 = BatchNorm(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes,
+                             padding=dilation[1], dilation=dilation[1])
+        self.bn2 = BatchNorm(planes)
+        self.downsample = downsample
+        self.stride = stride
+        self.residual = residual
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        if self.residual:
+            out += residual
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None,
+                 dilation=(1, 1), residual=True, BatchNorm=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = BatchNorm(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=dilation[1], bias=False,
+                               dilation=dilation[1])
+        self.bn2 = BatchNorm(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = BatchNorm(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class DRN(nn.Module):
+    def __init__(self, block, layers, arch='D',
+                 channels=(16, 32, 64, 128, 256, 512, 512, 512),
+                 BatchNorm=None):
+        super(DRN, self).__init__()
+        self.inplanes = channels[0]
+        self.out_dim = channels[-1]
+        self.arch = arch
+        if arch == 'C':
+            self.conv1 = nn.Conv2d(3, channels[0], kernel_size=7, stride=1,
+                                   padding=3, bias=False)
+            self.bn1 = BatchNorm(channels[0])
+            self.relu = nn.ReLU(inplace=True)
+            self.layer1 = self._make_layer(
+                BasicBlock, channels[0], layers[0], stride=1, BatchNorm=BatchNorm)
+            self.layer2 = self._make_layer(
+                BasicBlock, channels[1], layers[1], stride=2, BatchNorm=BatchNorm)
+        elif arch == 'D':
+            self.layer0 = nn.Sequential(
+                nn.Conv2d(3, channels[0], kernel_size=7, stride=1, padding=3,
+                          bias=False),
+                BatchNorm(channels[0]),
+                nn.ReLU(inplace=True)
+            )
+            self.layer1 = self._make_conv_layers(
+                channels[0], layers[0], stride=1, BatchNorm=BatchNorm)
+            self.layer2 = self._make_conv_layers(
+                channels[1], layers[1], stride=2, BatchNorm=BatchNorm)
+        self.layer3 = self._make_layer(block, channels[2], layers[2], stride=2, BatchNorm=BatchNorm)
+        self.layer4 = self._make_layer(block, channels[3], layers[3], stride=2, BatchNorm=BatchNorm)
+        self.layer5 = self._make_layer(block, channels[4], layers[4],
+                                       dilation=2, new_level=False, BatchNorm=BatchNorm)
+        self.layer6 = None if layers[5] == 0 else \
+            self._make_layer(block, channels[5], layers[5], dilation=4,
+                             new_level=False, BatchNorm=BatchNorm)
+        if arch == 'C':
+            self.layer7 = None if layers[6] == 0 else \
+                self._make_layer(BasicBlock, channels[6], layers[6], dilation=2,
+                                 new_level=False, residual=False, BatchNorm=BatchNorm)
+            self.layer8 = None if layers[7] == 0 else \
+                self._make_layer(BasicBlock, channels[7], layers[7], dilation=1,
+                                 new_level=False, residual=False, BatchNorm=BatchNorm)
+        elif arch == 'D':
+            self.layer7 = None if layers[6] == 0 else \
+                self._make_conv_layers(channels[6], layers[6], dilation=2, BatchNorm=BatchNorm)
+            self.layer8 = None if layers[7] == 0 else \
+                self._make_conv_layers(channels[7], layers[7], dilation=1, BatchNorm=BatchNorm)
+        self._init_weight()
+    def _init_weight(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, SynchronizedBatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+    def _make_layer(self, block, planes, blocks, stride=1, dilation=1,
+                    new_level=True, residual=True, BatchNorm=None):
+        assert dilation == 1 or dilation % 2 == 0
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                BatchNorm(planes * block.expansion),
+            )
+        layers = list()
+        layers.append(block(
+            self.inplanes, planes, stride, downsample,
+            dilation=(1, 1) if dilation == 1 else (
+                dilation // 2 if new_level else dilation, dilation),
+            residual=residual, BatchNorm=BatchNorm))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, residual=residual,
+                                dilation=(dilation, dilation), BatchNorm=BatchNorm))
+        return nn.Sequential(*layers)
+    def _make_conv_layers(self, channels, convs, stride=1, dilation=1, BatchNorm=None):
+        modules = []
+        for i in range(convs):
+            modules.extend([
+                nn.Conv2d(self.inplanes, channels, kernel_size=3,
+                          stride=stride if i == 0 else 1,
+                          padding=dilation, bias=False, dilation=dilation),
+                BatchNorm(channels),
+                nn.ReLU(inplace=True)])
+            self.inplanes = channels
+        return nn.Sequential(*modules)
+    def forward(self, x):
+        if self.arch == 'C':
+            x = self.conv1(x)
+            x = self.bn1(x)
+            x = self.relu(x)
+        elif self.arch == 'D':
+            x = self.layer0(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        low_level_feat = x
+        x = self.layer4(x)
+        x = self.layer5(x)
+        if self.layer6 is not None:
+            x = self.layer6(x)
+        if self.layer7 is not None:
+            x = self.layer7(x)
+        if self.layer8 is not None:
+            x = self.layer8(x)
+        return x, low_level_feat
+class DRN_A(nn.Module):
+    def __init__(self, block, layers, BatchNorm=None):
+        self.inplanes = 64
+        super(DRN_A, self).__init__()
+        self.out_dim = 512 * block.expansion
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = BatchNorm(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0], BatchNorm=BatchNorm)
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, BatchNorm=BatchNorm)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=1,
+                                       dilation=2, BatchNorm=BatchNorm)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
+                                       dilation=4, BatchNorm=BatchNorm)
+        self._init_weight()
+    def _init_weight(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, SynchronizedBatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+    def _make_layer(self, block, planes, blocks, stride=1, dilation=1, BatchNorm=None):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                BatchNorm(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, BatchNorm=BatchNorm))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes,
+                                dilation=(dilation, dilation, ), BatchNorm=BatchNorm))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+def drn_a_50(BatchNorm, pretrained=True):
+    model = DRN_A(Bottleneck, [3, 4, 6, 3], BatchNorm=BatchNorm)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
+    return model
+def drn_c_26(BatchNorm, pretrained=True):
+    model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='C', BatchNorm=BatchNorm)
+    if pretrained:
+        pretrained = model_zoo.load_url(model_urls['drn-c-26'])
+        del pretrained['fc.weight']
+        del pretrained['fc.bias']
+        model.load_state_dict(pretrained)
+    return model
+def drn_c_42(BatchNorm, pretrained=True):
+    model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', BatchNorm=BatchNorm)
+    if pretrained:
+        pretrained = model_zoo.load_url(model_urls['drn-c-42'])
+        del pretrained['fc.weight']
+        del pretrained['fc.bias']
+        model.load_state_dict(pretrained)
+    return model
+def drn_c_58(BatchNorm, pretrained=True):
+    model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', BatchNorm=BatchNorm)
+    if pretrained:
+        pretrained = model_zoo.load_url(model_urls['drn-c-58'])
+        del pretrained['fc.weight']
+        del pretrained['fc.bias']
+        model.load_state_dict(pretrained)
+    return model
+def drn_d_22(BatchNorm, pretrained=True):
+    model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='D', BatchNorm=BatchNorm)
+    if pretrained:
+        pretrained = model_zoo.load_url(model_urls['drn-d-22'])
+        del pretrained['fc.weight']
+        del pretrained['fc.bias']
+        model.load_state_dict(pretrained)
+    return model
+def drn_d_24(BatchNorm, pretrained=True):
+    model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 2, 2], arch='D', BatchNorm=BatchNorm)
+    if pretrained:
+        pretrained = model_zoo.load_url(model_urls['drn-d-24'])
+        del pretrained['fc.weight']
+        del pretrained['fc.bias']
+        model.load_state_dict(pretrained)
+    return model
+def drn_d_38(BatchNorm, pretrained=True):
+    model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', BatchNorm=BatchNorm)
+    if pretrained:
+        pretrained = model_zoo.load_url(model_urls['drn-d-38'])
+        del pretrained['fc.weight']
+        del pretrained['fc.bias']
+        model.load_state_dict(pretrained)
+    return model
+def drn_d_40(BatchNorm, pretrained=True):
+    model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 2, 2], arch='D', BatchNorm=BatchNorm)
+    if pretrained:
+        pretrained = model_zoo.load_url(model_urls['drn-d-40'])
+        del pretrained['fc.weight']
+        del pretrained['fc.bias']
+        model.load_state_dict(pretrained)
+    return model
+def drn_d_54(BatchNorm, pretrained=True):
+    model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', BatchNorm=BatchNorm)
+    if pretrained:
+        pretrained = model_zoo.load_url(model_urls['drn-d-54'])
+        del pretrained['fc.weight']
+        del pretrained['fc.bias']
+        model.load_state_dict(pretrained)
+    return model
+def drn_d_105(BatchNorm, pretrained=True):
+    model = DRN(Bottleneck, [1, 1, 3, 4, 23, 3, 1, 1], arch='D', BatchNorm=BatchNorm)
+    if pretrained:
+        pretrained = model_zoo.load_url(model_urls['drn-d-105'])
+        del pretrained['fc.weight']
+        del pretrained['fc.bias']
+        model.load_state_dict(pretrained)
+    return model
+if __name__ == "__main__":
+    import torch
+    model = drn_a_50(BatchNorm=nn.BatchNorm2d, pretrained=True)
+    input = torch.rand(1, 3, 512, 512)
+    output, low_level_feat = model(input)
+    print(output.size())
+    print(low_level_feat.size())

data/MBD/model/deep_lab_model/backbone/mobilenet.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import torch
+import torch.nn.functional as F
+import torch.nn as nn
+import math
+from model.deep_lab_model.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
+import torch.utils.model_zoo as model_zoo
+def conv_bn(inp, oup, stride, BatchNorm):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+        BatchNorm(oup),
+        nn.ReLU6(inplace=True)
+    )
+def fixed_padding(inputs, kernel_size, dilation):
+    kernel_size_effective = kernel_size + (kernel_size - 1) * (dilation - 1)
+    pad_total = kernel_size_effective - 1
+    pad_beg = pad_total // 2
+    pad_end = pad_total - pad_beg
+    padded_inputs = F.pad(inputs, (pad_beg, pad_end, pad_beg, pad_end))
+    return padded_inputs
+class InvertedResidual(nn.Module):
+    def __init__(self, inp, oup, stride, dilation, expand_ratio, BatchNorm):
+        super(InvertedResidual, self).__init__()
+        self.stride = stride
+        assert stride in [1, 2]
+        hidden_dim = round(inp * expand_ratio)
+        self.use_res_connect = self.stride == 1 and inp == oup
+        self.kernel_size = 3
+        self.dilation = dilation
+        if expand_ratio == 1:
+            self.conv = nn.Sequential(
+                # dw
+                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 0, dilation, groups=hidden_dim, bias=False),
+                BatchNorm(hidden_dim),
+                nn.ReLU6(inplace=True),
+                # pw-linear
+                nn.Conv2d(hidden_dim, oup, 1, 1, 0, 1, 1, bias=False),
+                BatchNorm(oup),
+            )
+        else:
+            self.conv = nn.Sequential(
+                # pw
+                nn.Conv2d(inp, hidden_dim, 1, 1, 0, 1, bias=False),
+                BatchNorm(hidden_dim),
+                nn.ReLU6(inplace=True),
+                # dw
+                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 0, dilation, groups=hidden_dim, bias=False),
+                BatchNorm(hidden_dim),
+                nn.ReLU6(inplace=True),
+                # pw-linear
+                nn.Conv2d(hidden_dim, oup, 1, 1, 0, 1, bias=False),
+                BatchNorm(oup),
+            )
+    def forward(self, x):
+        x_pad = fixed_padding(x, self.kernel_size, dilation=self.dilation)
+        if self.use_res_connect:
+            x = x + self.conv(x_pad)
+        else:
+            x = self.conv(x_pad)
+        return x
+class MobileNetV2(nn.Module):
+    def __init__(self, output_stride=8, BatchNorm=None, width_mult=1., pretrained=True):
+        super(MobileNetV2, self).__init__()
+        block = InvertedResidual
+        input_channel = 32
+        current_stride = 1
+        rate = 1
+        interverted_residual_setting = [
+            # t, c, n, s
+            [1, 16, 1, 1],
+            [6, 24, 2, 2],
+            [6, 32, 3, 2],
+            [6, 64, 4, 2],
+            [6, 96, 3, 1],
+            [6, 160, 3, 2],
+            [6, 320, 1, 1],
+        ]
+        # building first layer
+        input_channel = int(input_channel * width_mult)
+        self.features = [conv_bn(3, input_channel, 2, BatchNorm)]
+        current_stride *= 2
+        # building inverted residual blocks
+        for t, c, n, s in interverted_residual_setting:
+            if current_stride == output_stride:
+                stride = 1
+                dilation = rate
+                rate *= s
+            else:
+                stride = s
+                dilation = 1
+                current_stride *= s
+            output_channel = int(c * width_mult)
+            for i in range(n):
+                if i == 0:
+                    self.features.append(block(input_channel, output_channel, stride, dilation, t, BatchNorm))
+                else:
+                    self.features.append(block(input_channel, output_channel, 1, dilation, t, BatchNorm))
+                input_channel = output_channel
+        self.features = nn.Sequential(*self.features)
+        self._initialize_weights()
+        if pretrained:
+            self._load_pretrained_model()
+        self.low_level_features = self.features[0:4]
+        self.high_level_features = self.features[4:]
+    def forward(self, x):
+        low_level_feat = self.low_level_features(x)
+        x = self.high_level_features(low_level_feat)
+        return x, low_level_feat
+    def _load_pretrained_model(self):
+        pretrain_dict = model_zoo.load_url('http://jeff95.me/models/mobilenet_v2-6a65762b.pth')
+        model_dict = {}
+        state_dict = self.state_dict()
+        for k, v in pretrain_dict.items():
+            if k in state_dict:
+                model_dict[k] = v
+        state_dict.update(model_dict)
+        self.load_state_dict(state_dict)
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                # m.weight.data.normal_(0, math.sqrt(2. / n))
+                torch.nn.init.kaiming_normal_(m.weight)
+            elif isinstance(m, SynchronizedBatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+if __name__ == "__main__":
+    input = torch.rand(1, 3, 512, 512)
+    model = MobileNetV2(output_stride=16, BatchNorm=nn.BatchNorm2d)
+    output, low_level_feat = model(input)
+    print(output.size())
+    print(low_level_feat.size())

data/MBD/model/deep_lab_model/backbone/resnet.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import math
+import torch.nn as nn
+import torch.utils.model_zoo as model_zoo
+from model.deep_lab_model.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, BatchNorm=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = BatchNorm(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               dilation=dilation, padding=dilation, bias=False)
+        self.bn2 = BatchNorm(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = BatchNorm(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+        self.dilation = dilation
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(self, block, layers, output_stride, BatchNorm, pretrained=True):
+        self.inplanes = 64
+        super(ResNet, self).__init__()
+        blocks = [1, 2, 4]
+        if output_stride == 16:
+            strides = [1, 2, 2, 1]
+            dilations = [1, 1, 1, 2]
+        elif output_stride == 8:
+            strides = [1, 2, 1, 1]
+            dilations = [1, 1, 2, 4]
+        else:
+            raise NotImplementedError
+        # Modules
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                                bias=False)
+        self.bn1 = BatchNorm(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0], stride=strides[0], dilation=dilations[0], BatchNorm=BatchNorm)
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=strides[1], dilation=dilations[1], BatchNorm=BatchNorm)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=strides[2], dilation=dilations[2], BatchNorm=BatchNorm)
+        self.layer4 = self._make_MG_unit(block, 512, blocks=blocks, stride=strides[3], dilation=dilations[3], BatchNorm=BatchNorm)
+        # self.layer4 = self._make_layer(block, 512, layers[3], stride=strides[3], dilation=dilations[3], BatchNorm=BatchNorm)
+        self._init_weight()
+        # if pretrained:
+        #     self._load_pretrained_model()
+    def _make_layer(self, block, planes, blocks, stride=1, dilation=1, BatchNorm=None):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                BatchNorm(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, dilation, downsample, BatchNorm))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, dilation=dilation, BatchNorm=BatchNorm))
+        return nn.Sequential(*layers)
+    def _make_MG_unit(self, block, planes, blocks, stride=1, dilation=1, BatchNorm=None):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                BatchNorm(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, dilation=blocks[0]*dilation,
+                            downsample=downsample, BatchNorm=BatchNorm))
+        self.inplanes = planes * block.expansion
+        for i in range(1, len(blocks)):
+            layers.append(block(self.inplanes, planes, stride=1,
+                                dilation=blocks[i]*dilation, BatchNorm=BatchNorm))
+        return nn.Sequential(*layers)
+    def forward(self, input):
+        x = self.conv1(input)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        low_level_feat = x
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x, low_level_feat
+    def _init_weight(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, SynchronizedBatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+    def _load_pretrained_model(self):
+        import urllib.request
+        import ssl
+        ssl._create_default_https_context = ssl._create_unverified_context
+        response = urllib.request.urlopen('https://download.pytorch.org/models/resnet101-5d3b4d8f.pth')
+        pretrain_dict = model_zoo.load_url('https://download.pytorch.org/models/resnet101-5d3b4d8f.pth')
+        model_dict = {}
+        state_dict = self.state_dict()
+        for k, v in pretrain_dict.items():
+            if k in state_dict:
+                # if 'conv1' in k:
+                #     continue
+                model_dict[k] = v
+        state_dict.update(model_dict)
+        self.load_state_dict(state_dict)
+def ResNet101(output_stride, BatchNorm, pretrained=True):
+    """Constructs a ResNet-101 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 23, 3], output_stride, BatchNorm, pretrained=pretrained)
+    return model
+if __name__ == "__main__":
+    import torch
+    model = ResNet101(BatchNorm=nn.BatchNorm2d, pretrained=True, output_stride=8)
+    input = torch.rand(1, 3, 512, 512)
+    output, low_level_feat = model(input)
+    print(output.size())
+    print(low_level_feat.size())

data/MBD/model/deep_lab_model/backbone/xception.py ADDED Viewed

	@@ -0,0 +1,288 @@

+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.model_zoo as model_zoo
+from model.deep_lab_model.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
+def fixed_padding(inputs, kernel_size, dilation):
+    kernel_size_effective = kernel_size + (kernel_size - 1) * (dilation - 1)
+    pad_total = kernel_size_effective - 1
+    pad_beg = pad_total // 2
+    pad_end = pad_total - pad_beg
+    padded_inputs = F.pad(inputs, (pad_beg, pad_end, pad_beg, pad_end))
+    return padded_inputs
+class SeparableConv2d(nn.Module):
+    def __init__(self, inplanes, planes, kernel_size=3, stride=1, dilation=1, bias=False, BatchNorm=None):
+        super(SeparableConv2d, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, inplanes, kernel_size, stride, 0, dilation,
+                               groups=inplanes, bias=bias)
+        self.bn = BatchNorm(inplanes)
+        self.pointwise = nn.Conv2d(inplanes, planes, 1, 1, 0, 1, 1, bias=bias)
+    def forward(self, x):
+        x = fixed_padding(x, self.conv1.kernel_size[0], dilation=self.conv1.dilation[0])
+        x = self.conv1(x)
+        x = self.bn(x)
+        x = self.pointwise(x)
+        return x
+class Block(nn.Module):
+    def __init__(self, inplanes, planes, reps, stride=1, dilation=1, BatchNorm=None,
+                 start_with_relu=True, grow_first=True, is_last=False):
+        super(Block, self).__init__()
+        if planes != inplanes or stride != 1:
+            self.skip = nn.Conv2d(inplanes, planes, 1, stride=stride, bias=False)
+            self.skipbn = BatchNorm(planes)
+        else:
+            self.skip = None
+        self.relu = nn.ReLU(inplace=True)
+        rep = []
+        filters = inplanes
+        if grow_first:
+            rep.append(self.relu)
+            rep.append(SeparableConv2d(inplanes, planes, 3, 1, dilation, BatchNorm=BatchNorm))
+            rep.append(BatchNorm(planes))
+            filters = planes
+        for i in range(reps - 1):
+            rep.append(self.relu)
+            rep.append(SeparableConv2d(filters, filters, 3, 1, dilation, BatchNorm=BatchNorm))
+            rep.append(BatchNorm(filters))
+        if not grow_first:
+            rep.append(self.relu)
+            rep.append(SeparableConv2d(inplanes, planes, 3, 1, dilation, BatchNorm=BatchNorm))
+            rep.append(BatchNorm(planes))
+        if stride != 1:
+            rep.append(self.relu)
+            rep.append(SeparableConv2d(planes, planes, 3, 2, BatchNorm=BatchNorm))
+            rep.append(BatchNorm(planes))
+        if stride == 1 and is_last:
+            rep.append(self.relu)
+            rep.append(SeparableConv2d(planes, planes, 3, 1, BatchNorm=BatchNorm))
+            rep.append(BatchNorm(planes))
+        if not start_with_relu:
+            rep = rep[1:]
+        self.rep = nn.Sequential(*rep)
+    def forward(self, inp):
+        x = self.rep(inp)
+        if self.skip is not None:
+            skip = self.skip(inp)
+            skip = self.skipbn(skip)
+        else:
+            skip = inp
+        x = x + skip
+        return x
+class AlignedXception(nn.Module):
+    """
+    Modified Alighed Xception
+    """
+    def __init__(self, output_stride, BatchNorm,
+                 pretrained=True):
+        super(AlignedXception, self).__init__()
+        if output_stride == 16:
+            entry_block3_stride = 2
+            middle_block_dilation = 1
+            exit_block_dilations = (1, 2)
+        elif output_stride == 8:
+            entry_block3_stride = 1
+            middle_block_dilation = 2
+            exit_block_dilations = (2, 4)
+        else:
+            raise NotImplementedError
+        # Entry flow
+        self.conv1 = nn.Conv2d(3, 32, 3, stride=2, padding=1, bias=False)
+        self.bn1 = BatchNorm(32)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv2d(32, 64, 3, stride=1, padding=1, bias=False)
+        self.bn2 = BatchNorm(64)
+        self.block1 = Block(64, 128, reps=2, stride=2, BatchNorm=BatchNorm, start_with_relu=False)
+        self.block2 = Block(128, 256, reps=2, stride=2, BatchNorm=BatchNorm, start_with_relu=False,
+                            grow_first=True)
+        self.block3 = Block(256, 728, reps=2, stride=entry_block3_stride, BatchNorm=BatchNorm,
+                            start_with_relu=True, grow_first=True, is_last=True)
+        # Middle flow
+        self.block4  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block5  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block6  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block7  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block8  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block9  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block10 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block11 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block12 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block13 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block14 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block15 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block16 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block17 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block18 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        self.block19 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
+        # Exit flow
+        self.block20 = Block(728, 1024, reps=2, stride=1, dilation=exit_block_dilations[0],
+                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=False, is_last=True)
+        self.conv3 = SeparableConv2d(1024, 1536, 3, stride=1, dilation=exit_block_dilations[1], BatchNorm=BatchNorm)
+        self.bn3 = BatchNorm(1536)
+        self.conv4 = SeparableConv2d(1536, 1536, 3, stride=1, dilation=exit_block_dilations[1], BatchNorm=BatchNorm)
+        self.bn4 = BatchNorm(1536)
+        self.conv5 = SeparableConv2d(1536, 2048, 3, stride=1, dilation=exit_block_dilations[1], BatchNorm=BatchNorm)
+        self.bn5 = BatchNorm(2048)
+        # Init weights
+        self._init_weight()
+        # Load pretrained model
+        if pretrained:
+            self._load_pretrained_model()
+    def forward(self, x):
+        # Entry flow
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu(x)
+        x = self.block1(x)
+        # add relu here
+        x = self.relu(x)
+        low_level_feat = x
+        x = self.block2(x)
+        x = self.block3(x)
+        # Middle flow
+        x = self.block4(x)
+        x = self.block5(x)
+        x = self.block6(x)
+        x = self.block7(x)
+        x = self.block8(x)
+        x = self.block9(x)
+        x = self.block10(x)
+        x = self.block11(x)
+        x = self.block12(x)
+        x = self.block13(x)
+        x = self.block14(x)
+        x = self.block15(x)
+        x = self.block16(x)
+        x = self.block17(x)
+        x = self.block18(x)
+        x = self.block19(x)
+        # Exit flow
+        x = self.block20(x)
+        x = self.relu(x)
+        x = self.conv3(x)
+        x = self.bn3(x)
+        x = self.relu(x)
+        x = self.conv4(x)
+        x = self.bn4(x)
+        x = self.relu(x)
+        x = self.conv5(x)
+        x = self.bn5(x)
+        x = self.relu(x)
+        return x, low_level_feat
+    def _init_weight(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, SynchronizedBatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+    def _load_pretrained_model(self):
+        pretrain_dict = model_zoo.load_url('http://data.lip6.fr/cadene/pretrainedmodels/xception-b5690688.pth')
+        model_dict = {}
+        state_dict = self.state_dict()
+        for k, v in pretrain_dict.items():
+            if k in state_dict:
+                if 'pointwise' in k:
+                    v = v.unsqueeze(-1).unsqueeze(-1)
+                if k.startswith('block11'):
+                    model_dict[k] = v
+                    model_dict[k.replace('block11', 'block12')] = v
+                    model_dict[k.replace('block11', 'block13')] = v
+                    model_dict[k.replace('block11', 'block14')] = v
+                    model_dict[k.replace('block11', 'block15')] = v
+                    model_dict[k.replace('block11', 'block16')] = v
+                    model_dict[k.replace('block11', 'block17')] = v
+                    model_dict[k.replace('block11', 'block18')] = v
+                    model_dict[k.replace('block11', 'block19')] = v
+                elif k.startswith('block12'):
+                    model_dict[k.replace('block12', 'block20')] = v
+                elif k.startswith('bn3'):
+                    model_dict[k] = v
+                    model_dict[k.replace('bn3', 'bn4')] = v
+                elif k.startswith('conv4'):
+                    model_dict[k.replace('conv4', 'conv5')] = v
+                elif k.startswith('bn4'):
+                    model_dict[k.replace('bn4', 'bn5')] = v
+                else:
+                    model_dict[k] = v
+        state_dict.update(model_dict)
+        self.load_state_dict(state_dict)
+if __name__ == "__main__":
+    import torch
+    model = AlignedXception(BatchNorm=nn.BatchNorm2d, pretrained=True, output_stride=16)
+    input = torch.rand(1, 3, 512, 512)
+    output, low_level_feat = model(input)
+    print(output.size())
+    print(low_level_feat.size())

data/MBD/model/deep_lab_model/decoder.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from model.deep_lab_model.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
+class Decoder(nn.Module):
+    def __init__(self, num_classes, backbone, BatchNorm):
+        super(Decoder, self).__init__()
+        if backbone == 'resnet' or backbone == 'drn':
+            low_level_inplanes = 256
+        elif backbone == 'xception':
+            low_level_inplanes = 128
+        elif backbone == 'mobilenet':
+            low_level_inplanes = 24
+        else:
+            raise NotImplementedError
+        self.conv1 = nn.Conv2d(low_level_inplanes, 48, 1, bias=False)
+        self.bn1 = BatchNorm(48)
+        self.relu = nn.ReLU()
+        self.last_conv = nn.Sequential(nn.Conv2d(304, 256, kernel_size=3, stride=1, padding=1, bias=False),
+                                       BatchNorm(256),
+                                       nn.ReLU(),
+                                       nn.Dropout(0.5),
+                                       nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False),
+                                       BatchNorm(256),
+                                       nn.ReLU(),
+                                       nn.Dropout(0.1),
+                                       nn.Conv2d(256, num_classes, kernel_size=1, stride=1),
+                                       nn.Sigmoid()
+                                       )
+        self._init_weight()
+    def forward(self, x, low_level_feat):
+        low_level_feat = self.conv1(low_level_feat)
+        low_level_feat = self.bn1(low_level_feat)
+        low_level_feat = self.relu(low_level_feat)
+        x = F.interpolate(x, size=low_level_feat.size()[2:], mode='bilinear', align_corners=True)
+        x = torch.cat((x, low_level_feat), dim=1)
+        x = self.last_conv(x)
+        return x
+    def _init_weight(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                torch.nn.init.kaiming_normal_(m.weight)
+            elif isinstance(m, SynchronizedBatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+def build_decoder(num_classes, backbone, BatchNorm):
+    return Decoder(num_classes, backbone, BatchNorm)

data/MBD/model/deep_lab_model/deeplab.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from model.deep_lab_model.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
+from model.deep_lab_model.aspp import build_aspp
+from model.deep_lab_model.decoder import build_decoder
+from model.deep_lab_model.backbone import build_backbone
+class DeepLab(nn.Module):
+    def __init__(self, backbone='resnet', output_stride=16, num_classes=21,
+                 sync_bn=True, freeze_bn=False):
+        super(DeepLab, self).__init__()
+        if backbone == 'drn':
+            output_stride = 8
+        if sync_bn == True:
+            BatchNorm = SynchronizedBatchNorm2d
+        else:
+            BatchNorm = nn.BatchNorm2d
+        self.backbone = build_backbone(backbone, output_stride, BatchNorm)
+        self.aspp = build_aspp(backbone, output_stride, BatchNorm)
+        self.decoder = build_decoder(num_classes, backbone, BatchNorm)
+        self.freeze_bn = freeze_bn
+    def forward(self, input):
+        x, low_level_feat = self.backbone(input)
+        x = self.aspp(x)
+        x = self.decoder(x, low_level_feat)
+        x = F.interpolate(x, size=input.size()[2:], mode='bilinear', align_corners=True)
+        return x
+    def freeze_bn(self):
+        for m in self.modules():
+            if isinstance(m, SynchronizedBatchNorm2d):
+                m.eval()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.eval()
+    def get_1x_lr_params(self):
+        modules = [self.backbone]
+        for i in range(len(modules)):
+            for m in modules[i].named_modules():
+                if self.freeze_bn:
+                    if isinstance(m[1], nn.Conv2d):
+                        for p in m[1].parameters():
+                            if p.requires_grad:
+                                yield p
+                else:
+                    if isinstance(m[1], nn.Conv2d) or isinstance(m[1], SynchronizedBatchNorm2d) \
+                            or isinstance(m[1], nn.BatchNorm2d):
+                        for p in m[1].parameters():
+                            if p.requires_grad:
+                                yield p
+    def get_10x_lr_params(self):
+        modules = [self.aspp, self.decoder]
+        for i in range(len(modules)):
+            for m in modules[i].named_modules():
+                if self.freeze_bn:
+                    if isinstance(m[1], nn.Conv2d):
+                        for p in m[1].parameters():
+                            if p.requires_grad:
+                                yield p
+                else:
+                    if isinstance(m[1], nn.Conv2d) or isinstance(m[1], SynchronizedBatchNorm2d) \
+                            or isinstance(m[1], nn.BatchNorm2d):
+                        for p in m[1].parameters():
+                            if p.requires_grad:
+                                yield p
+if __name__ == "__main__":
+    model = DeepLab(backbone='mobilenet', output_stride=16)
+    model.eval()
+    input = torch.rand(1, 3, 513, 513)
+    output = model(input)
+    print(output.size())

data/MBD/model/deep_lab_model/sync_batchnorm/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# -*- coding: utf-8 -*-
+# File   : __init__.py
+# Author : Jiayuan Mao
+# Email  : [email protected]
+# Date   : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+# Distributed under MIT License.
+from .batchnorm import SynchronizedBatchNorm1d, SynchronizedBatchNorm2d, SynchronizedBatchNorm3d
+from .replicate import DataParallelWithCallback, patch_replication_callback

data/MBD/model/deep_lab_model/sync_batchnorm/batchnorm.py ADDED Viewed

	@@ -0,0 +1,282 @@

+# -*- coding: utf-8 -*-
+# File   : batchnorm.py
+# Author : Jiayuan Mao
+# Email  : [email protected]
+# Date   : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+# Distributed under MIT License.
+import collections
+import torch
+import torch.nn.functional as F
+from torch.nn.modules.batchnorm import _BatchNorm
+from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast
+from .comm import SyncMaster
+__all__ = ['SynchronizedBatchNorm1d', 'SynchronizedBatchNorm2d', 'SynchronizedBatchNorm3d']
+def _sum_ft(tensor):
+    """sum over the first and last dimention"""
+    return tensor.sum(dim=0).sum(dim=-1)
+def _unsqueeze_ft(tensor):
+    """add new dementions at the front and the tail"""
+    return tensor.unsqueeze(0).unsqueeze(-1)
+_ChildMessage = collections.namedtuple('_ChildMessage', ['sum', 'ssum', 'sum_size'])
+_MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'inv_std'])
+class _SynchronizedBatchNorm(_BatchNorm):
+    def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True):
+        super(_SynchronizedBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=affine)
+        self._sync_master = SyncMaster(self._data_parallel_master)
+        self._is_parallel = False
+        self._parallel_id = None
+        self._slave_pipe = None
+    def forward(self, input):
+        # If it is not parallel computation or is in evaluation mode, use PyTorch's implementation.
+        if not (self._is_parallel and self.training):
+            return F.batch_norm(
+                input, self.running_mean, self.running_var, self.weight, self.bias,
+                self.training, self.momentum, self.eps)
+        # Resize the input to (B, C, -1).
+        input_shape = input.size()
+        input = input.view(input.size(0), self.num_features, -1)
+        # Compute the sum and square-sum.
+        sum_size = input.size(0) * input.size(2)
+        input_sum = _sum_ft(input)
+        input_ssum = _sum_ft(input ** 2)
+        # Reduce-and-broadcast the statistics.
+        if self._parallel_id == 0:
+            mean, inv_std = self._sync_master.run_master(_ChildMessage(input_sum, input_ssum, sum_size))
+        else:
+            mean, inv_std = self._slave_pipe.run_slave(_ChildMessage(input_sum, input_ssum, sum_size))
+        # Compute the output.
+        if self.affine:
+            # MJY:: Fuse the multiplication for speed.
+            output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std * self.weight) + _unsqueeze_ft(self.bias)
+        else:
+            output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std)
+        # Reshape it.
+        return output.view(input_shape)
+    def __data_parallel_replicate__(self, ctx, copy_id):
+        self._is_parallel = True
+        self._parallel_id = copy_id
+        # parallel_id == 0 means master device.
+        if self._parallel_id == 0:
+            ctx.sync_master = self._sync_master
+        else:
+            self._slave_pipe = ctx.sync_master.register_slave(copy_id)
+    def _data_parallel_master(self, intermediates):
+        """Reduce the sum and square-sum, compute the statistics, and broadcast it."""
+        # Always using same "device order" makes the ReduceAdd operation faster.
+        # Thanks to:: Tete Xiao (http://tetexiao.com/)
+        intermediates = sorted(intermediates, key=lambda i: i[1].sum.get_device())
+        to_reduce = [i[1][:2] for i in intermediates]
+        to_reduce = [j for i in to_reduce for j in i]  # flatten
+        target_gpus = [i[1].sum.get_device() for i in intermediates]
+        sum_size = sum([i[1].sum_size for i in intermediates])
+        sum_, ssum = ReduceAddCoalesced.apply(target_gpus[0], 2, *to_reduce)
+        mean, inv_std = self._compute_mean_std(sum_, ssum, sum_size)
+        broadcasted = Broadcast.apply(target_gpus, mean, inv_std)
+        outputs = []
+        for i, rec in enumerate(intermediates):
+            outputs.append((rec[0], _MasterMessage(*broadcasted[i * 2:i * 2 + 2])))
+        return outputs
+    def _compute_mean_std(self, sum_, ssum, size):
+        """Compute the mean and standard-deviation with sum and square-sum. This method
+        also maintains the moving average on the master device."""
+        assert size > 1, 'BatchNorm computes unbiased standard-deviation, which requires size > 1.'
+        mean = sum_ / size
+        sumvar = ssum - sum_ * mean
+        unbias_var = sumvar / (size - 1)
+        bias_var = sumvar / size
+        self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean.data
+        self.running_var = (1 - self.momentum) * self.running_var + self.momentum * unbias_var.data
+        return mean, bias_var.clamp(self.eps) ** -0.5
+class SynchronizedBatchNorm1d(_SynchronizedBatchNorm):
+    r"""Applies Synchronized Batch Normalization over a 2d or 3d input that is seen as a
+    mini-batch.
+    .. math::
+        y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta
+    This module differs from the built-in PyTorch BatchNorm1d as the mean and
+    standard-deviation are reduced across all devices during training.
+    For example, when one uses `nn.DataParallel` to wrap the network during
+    training, PyTorch's implementation normalize the tensor on each device using
+    the statistics only on that device, which accelerated the computation and
+    is also easy to implement, but the statistics might be inaccurate.
+    Instead, in this synchronized version, the statistics will be computed
+    over all training samples distributed on multiple devices.
+    Note that, for one-GPU or CPU-only case, this module behaves exactly same
+    as the built-in PyTorch implementation.
+    The mean and standard-deviation are calculated per-dimension over
+    the mini-batches and gamma and beta are learnable parameter vectors
+    of size C (where C is the input size).
+    During training, this layer keeps a running estimate of its computed mean
+    and variance. The running sum is kept with a default momentum of 0.1.
+    During evaluation, this running mean/variance is used for normalization.
+    Because the BatchNorm is done over the `C` dimension, computing statistics
+    on `(N, L)` slices, it's common terminology to call this Temporal BatchNorm
+    Args:
+        num_features: num_features from an expected input of size
+            `batch_size x num_features [x width]`
+        eps: a value added to the denominator for numerical stability.
+            Default: 1e-5
+        momentum: the value used for the running_mean and running_var
+            computation. Default: 0.1
+        affine: a boolean value that when set to ``True``, gives the layer learnable
+            affine parameters. Default: ``True``
+    Shape:
+        - Input: :math:`(N, C)` or :math:`(N, C, L)`
+        - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input)
+    Examples:
+        >>> # With Learnable Parameters
+        >>> m = SynchronizedBatchNorm1d(100)
+        >>> # Without Learnable Parameters
+        >>> m = SynchronizedBatchNorm1d(100, affine=False)
+        >>> input = torch.autograd.Variable(torch.randn(20, 100))
+        >>> output = m(input)
+    """
+    def _check_input_dim(self, input):
+        if input.dim() != 2 and input.dim() != 3:
+            raise ValueError('expected 2D or 3D input (got {}D input)'
+                             .format(input.dim()))
+        super(SynchronizedBatchNorm1d, self)._check_input_dim(input)
+class SynchronizedBatchNorm2d(_SynchronizedBatchNorm):
+    r"""Applies Batch Normalization over a 4d input that is seen as a mini-batch
+    of 3d inputs
+    .. math::
+        y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta
+    This module differs from the built-in PyTorch BatchNorm2d as the mean and
+    standard-deviation are reduced across all devices during training.
+    For example, when one uses `nn.DataParallel` to wrap the network during
+    training, PyTorch's implementation normalize the tensor on each device using
+    the statistics only on that device, which accelerated the computation and
+    is also easy to implement, but the statistics might be inaccurate.
+    Instead, in this synchronized version, the statistics will be computed
+    over all training samples distributed on multiple devices.
+    Note that, for one-GPU or CPU-only case, this module behaves exactly same
+    as the built-in PyTorch implementation.
+    The mean and standard-deviation are calculated per-dimension over
+    the mini-batches and gamma and beta are learnable parameter vectors
+    of size C (where C is the input size).
+    During training, this layer keeps a running estimate of its computed mean
+    and variance. The running sum is kept with a default momentum of 0.1.
+    During evaluation, this running mean/variance is used for normalization.
+    Because the BatchNorm is done over the `C` dimension, computing statistics
+    on `(N, H, W)` slices, it's common terminology to call this Spatial BatchNorm
+    Args:
+        num_features: num_features from an expected input of
+            size batch_size x num_features x height x width
+        eps: a value added to the denominator for numerical stability.
+            Default: 1e-5
+        momentum: the value used for the running_mean and running_var
+            computation. Default: 0.1
+        affine: a boolean value that when set to ``True``, gives the layer learnable
+            affine parameters. Default: ``True``
+    Shape:
+        - Input: :math:`(N, C, H, W)`
+        - Output: :math:`(N, C, H, W)` (same shape as input)
+    Examples:
+        >>> # With Learnable Parameters
+        >>> m = SynchronizedBatchNorm2d(100)
+        >>> # Without Learnable Parameters
+        >>> m = SynchronizedBatchNorm2d(100, affine=False)
+        >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45))
+        >>> output = m(input)
+    """
+    def _check_input_dim(self, input):
+        if input.dim() != 4:
+            raise ValueError('expected 4D input (got {}D input)'
+                             .format(input.dim()))
+        super(SynchronizedBatchNorm2d, self)._check_input_dim(input)
+class SynchronizedBatchNorm3d(_SynchronizedBatchNorm):
+    r"""Applies Batch Normalization over a 5d input that is seen as a mini-batch
+    of 4d inputs
+    .. math::
+        y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta
+    This module differs from the built-in PyTorch BatchNorm3d as the mean and
+    standard-deviation are reduced across all devices during training.
+    For example, when one uses `nn.DataParallel` to wrap the network during
+    training, PyTorch's implementation normalize the tensor on each device using
+    the statistics only on that device, which accelerated the computation and
+    is also easy to implement, but the statistics might be inaccurate.
+    Instead, in this synchronized version, the statistics will be computed
+    over all training samples distributed on multiple devices.
+    Note that, for one-GPU or CPU-only case, this module behaves exactly same
+    as the built-in PyTorch implementation.
+    The mean and standard-deviation are calculated per-dimension over
+    the mini-batches and gamma and beta are learnable parameter vectors
+    of size C (where C is the input size).
+    During training, this layer keeps a running estimate of its computed mean
+    and variance. The running sum is kept with a default momentum of 0.1.
+    During evaluation, this running mean/variance is used for normalization.
+    Because the BatchNorm is done over the `C` dimension, computing statistics
+    on `(N, D, H, W)` slices, it's common terminology to call this Volumetric BatchNorm
+    or Spatio-temporal BatchNorm
+    Args:
+        num_features: num_features from an expected input of
+            size batch_size x num_features x depth x height x width
+        eps: a value added to the denominator for numerical stability.
+            Default: 1e-5
+        momentum: the value used for the running_mean and running_var
+            computation. Default: 0.1
+        affine: a boolean value that when set to ``True``, gives the layer learnable
+            affine parameters. Default: ``True``
+    Shape:
+        - Input: :math:`(N, C, D, H, W)`
+        - Output: :math:`(N, C, D, H, W)` (same shape as input)
+    Examples:
+        >>> # With Learnable Parameters
+        >>> m = SynchronizedBatchNorm3d(100)
+        >>> # Without Learnable Parameters
+        >>> m = SynchronizedBatchNorm3d(100, affine=False)
+        >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45, 10))
+        >>> output = m(input)
+    """
+    def _check_input_dim(self, input):
+        if input.dim() != 5:
+            raise ValueError('expected 5D input (got {}D input)'
+                             .format(input.dim()))
+        super(SynchronizedBatchNorm3d, self)._check_input_dim(input)

data/MBD/model/deep_lab_model/sync_batchnorm/comm.py ADDED Viewed

	@@ -0,0 +1,129 @@

+# -*- coding: utf-8 -*-
+# File   : comm.py
+# Author : Jiayuan Mao
+# Email  : [email protected]
+# Date   : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+# Distributed under MIT License.
+import queue
+import collections
+import threading
+__all__ = ['FutureResult', 'SlavePipe', 'SyncMaster']
+class FutureResult(object):
+    """A thread-safe future implementation. Used only as one-to-one pipe."""
+    def __init__(self):
+        self._result = None
+        self._lock = threading.Lock()
+        self._cond = threading.Condition(self._lock)
+    def put(self, result):
+        with self._lock:
+            assert self._result is None, 'Previous result has\'t been fetched.'
+            self._result = result
+            self._cond.notify()
+    def get(self):
+        with self._lock:
+            if self._result is None:
+                self._cond.wait()
+            res = self._result
+            self._result = None
+            return res
+_MasterRegistry = collections.namedtuple('MasterRegistry', ['result'])
+_SlavePipeBase = collections.namedtuple('_SlavePipeBase', ['identifier', 'queue', 'result'])
+class SlavePipe(_SlavePipeBase):
+    """Pipe for master-slave communication."""
+    def run_slave(self, msg):
+        self.queue.put((self.identifier, msg))
+        ret = self.result.get()
+        self.queue.put(True)
+        return ret
+class SyncMaster(object):
+    """An abstract `SyncMaster` object.
+    - During the replication, as the data parallel will trigger an callback of each module, all slave devices should
+    call `register(id)` and obtain an `SlavePipe` to communicate with the master.
+    - During the forward pass, master device invokes `run_master`, all messages from slave devices will be collected,
+    and passed to a registered callback.
+    - After receiving the messages, the master device should gather the information and determine to message passed
+    back to each slave devices.
+    """
+    def __init__(self, master_callback):
+        """
+        Args:
+            master_callback: a callback to be invoked after having collected messages from slave devices.
+        """
+        self._master_callback = master_callback
+        self._queue = queue.Queue()
+        self._registry = collections.OrderedDict()
+        self._activated = False
+    def __getstate__(self):
+        return {'master_callback': self._master_callback}
+    def __setstate__(self, state):
+        self.__init__(state['master_callback'])
+    def register_slave(self, identifier):
+        """
+        Register an slave device.
+        Args:
+            identifier: an identifier, usually is the device id.
+        Returns: a `SlavePipe` object which can be used to communicate with the master device.
+        """
+        if self._activated:
+            assert self._queue.empty(), 'Queue is not clean before next initialization.'
+            self._activated = False
+            self._registry.clear()
+        future = FutureResult()
+        self._registry[identifier] = _MasterRegistry(future)
+        return SlavePipe(identifier, self._queue, future)
+    def run_master(self, master_msg):
+        """
+        Main entry for the master device in each forward pass.
+        The messages were first collected from each devices (including the master device), and then
+        an callback will be invoked to compute the message to be sent back to each devices
+        (including the master device).
+        Args:
+            master_msg: the message that the master want to send to itself. This will be placed as the first
+            message when calling `master_callback`. For detailed usage, see `_SynchronizedBatchNorm` for an example.
+        Returns: the message to be sent back to the master device.
+        """
+        self._activated = True
+        intermediates = [(0, master_msg)]
+        for i in range(self.nr_slaves):
+            intermediates.append(self._queue.get())
+        results = self._master_callback(intermediates)
+        assert results[0][0] == 0, 'The first result should belongs to the master.'
+        for i, res in results:
+            if i == 0:
+                continue
+            self._registry[i].result.put(res)
+        for i in range(self.nr_slaves):
+            assert self._queue.get() is True
+        return results[0][1]
+    @property
+    def nr_slaves(self):
+        return len(self._registry)

data/MBD/model/deep_lab_model/sync_batchnorm/replicate.py ADDED Viewed

	@@ -0,0 +1,88 @@

+# -*- coding: utf-8 -*-
+# File   : replicate.py
+# Author : Jiayuan Mao
+# Email  : [email protected]
+# Date   : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+# Distributed under MIT License.
+import functools
+from torch.nn.parallel.data_parallel import DataParallel
+__all__ = [
+    'CallbackContext',
+    'execute_replication_callbacks',
+    'DataParallelWithCallback',
+    'patch_replication_callback'
+]
+class CallbackContext(object):
+    pass
+def execute_replication_callbacks(modules):
+    """
+    Execute an replication callback `__data_parallel_replicate__` on each module created by original replication.
+    The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)`
+    Note that, as all modules are isomorphism, we assign each sub-module with a context
+    (shared among multiple copies of this module on different devices).
+    Through this context, different copies can share some information.
+    We guarantee that the callback on the master copy (the first copy) will be called ahead of calling the callback
+    of any slave copies.
+    """
+    master_copy = modules[0]
+    nr_modules = len(list(master_copy.modules()))
+    ctxs = [CallbackContext() for _ in range(nr_modules)]
+    for i, module in enumerate(modules):
+        for j, m in enumerate(module.modules()):
+            if hasattr(m, '__data_parallel_replicate__'):
+                m.__data_parallel_replicate__(ctxs[j], i)
+class DataParallelWithCallback(DataParallel):
+    """
+    Data Parallel with a replication callback.
+    An replication callback `__data_parallel_replicate__` of each module will be invoked after being created by
+    original `replicate` function.
+    The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)`
+    Examples:
+        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
+        > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])
+        # sync_bn.__data_parallel_replicate__ will be invoked.
+    """
+    def replicate(self, module, device_ids):
+        modules = super(DataParallelWithCallback, self).replicate(module, device_ids)
+        execute_replication_callbacks(modules)
+        return modules
+def patch_replication_callback(data_parallel):
+    """
+    Monkey-patch an existing `DataParallel` object. Add the replication callback.
+    Useful when you have customized `DataParallel` implementation.
+    Examples:
+        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
+        > sync_bn = DataParallel(sync_bn, device_ids=[0, 1])
+        > patch_replication_callback(sync_bn)
+        # this is equivalent to
+        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
+        > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])
+    """
+    assert isinstance(data_parallel, DataParallel)
+    old_replicate = data_parallel.replicate
+    @functools.wraps(old_replicate)
+    def new_replicate(module, device_ids):
+        modules = old_replicate(module, device_ids)
+        execute_replication_callbacks(modules)
+        return modules
+    data_parallel.replicate = new_replicate

data/MBD/model/deep_lab_model/sync_batchnorm/unittest.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# -*- coding: utf-8 -*-
+# File   : unittest.py
+# Author : Jiayuan Mao
+# Email  : [email protected]
+# Date   : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+# Distributed under MIT License.
+import unittest
+import numpy as np
+from torch.autograd import Variable
+def as_numpy(v):
+    if isinstance(v, Variable):
+        v = v.data
+    return v.cpu().numpy()
+class TorchTestCase(unittest.TestCase):
+    def assertTensorClose(self, a, b, atol=1e-3, rtol=1e-3):
+        npa, npb = as_numpy(a), as_numpy(b)
+        self.assertTrue(
+                np.allclose(npa, npb, atol=atol),
+                'Tensor close check failed\n{}\n{}\nadiff={}, rdiff={}'.format(a, b, np.abs(npa - npb).max(), np.abs((npa - npb) / np.fmax(npa, 1e-5)).max())
+        )

data/MBD/model/densenetccnl.py ADDED Viewed

	@@ -0,0 +1,382 @@

+# Densenet decoder encoder with intermediate fully connected layers and dropout
+import torch
+import torch.backends.cudnn as cudnn
+import torch.nn as nn
+import torch.nn.functional as F
+import functools
+from torch.autograd import gradcheck
+from torch.autograd import Function
+from torch.autograd import Variable
+from torch.autograd import gradcheck
+from torch.autograd import Function
+import numpy as np
+def add_coordConv_channels(t):
+    n,c,h,w=t.size()
+    xx_channel=np.ones((h, w))
+    xx_range=np.array(range(h))
+    xx_range=np.expand_dims(xx_range,-1)
+    xx_coord=xx_channel*xx_range
+    yy_coord=xx_coord.transpose()
+    xx_coord=xx_coord/(h-1)
+    yy_coord=yy_coord/(h-1)
+    xx_coord=xx_coord*2 - 1
+    yy_coord=yy_coord*2 - 1
+    xx_coord=torch.from_numpy(xx_coord).float()
+    yy_coord=torch.from_numpy(yy_coord).float()
+    if t.is_cuda:
+    	xx_coord=xx_coord.cuda()
+    	yy_coord=yy_coord.cuda()
+    xx_coord=xx_coord.unsqueeze(0).unsqueeze(0).repeat(n,1,1,1)
+    yy_coord=yy_coord.unsqueeze(0).unsqueeze(0).repeat(n,1,1,1)
+    t_cc=torch.cat((t,xx_coord,yy_coord),dim=1)
+    return t_cc
+class DenseBlockEncoder(nn.Module):
+    def __init__(self, n_channels, n_convs, activation=nn.ReLU, args=[False]):
+        super(DenseBlockEncoder, self).__init__()
+        assert(n_convs > 0)
+        self.n_channels = n_channels
+        self.n_convs    = n_convs
+        self.layers     = nn.ModuleList()
+        for i in range(n_convs):
+            self.layers.append(nn.Sequential(
+                    nn.BatchNorm2d(n_channels),
+                    activation(*args),
+                    nn.Conv2d(n_channels, n_channels, 3, stride=1, padding=1, bias=False),))
+    def forward(self, inputs):
+        outputs = []
+        for i, layer in enumerate(self.layers):
+            if i > 0:
+                next_output = 0
+                for no in outputs:
+                    next_output = next_output + no
+                outputs.append(next_output)
+            else:
+                outputs.append(layer(inputs))
+        return outputs[-1]
+# Dense block in encoder.
+class DenseBlockDecoder(nn.Module):
+    def __init__(self, n_channels, n_convs, activation=nn.ReLU, args=[False]):
+        super(DenseBlockDecoder, self).__init__()
+        assert(n_convs > 0)
+        self.n_channels = n_channels
+        self.n_convs    = n_convs
+        self.layers = nn.ModuleList()
+        for i in range(n_convs):
+            self.layers.append(nn.Sequential(
+                    nn.BatchNorm2d(n_channels),
+                    activation(*args),
+                    nn.ConvTranspose2d(n_channels, n_channels, 3, stride=1, padding=1, bias=False),))
+    def forward(self, inputs):
+        outputs = []
+        for i, layer in enumerate(self.layers):
+            if i > 0:
+                next_output = 0
+                for no in outputs:
+                    next_output = next_output + no
+                outputs.append(next_output)
+            else:
+                outputs.append(layer(inputs))
+        return outputs[-1]
+class DenseTransitionBlockEncoder(nn.Module):
+    def __init__(self, n_channels_in, n_channels_out, mp, activation=nn.ReLU, args=[False]):
+        super(DenseTransitionBlockEncoder, self).__init__()
+        self.n_channels_in  = n_channels_in
+        self.n_channels_out = n_channels_out
+        self.mp             = mp
+        self.main           = nn.Sequential(
+                nn.BatchNorm2d(n_channels_in),
+                activation(*args),
+                nn.Conv2d(n_channels_in, n_channels_out, 1, stride=1, padding=0, bias=False),
+                nn.MaxPool2d(mp),
+        )
+    def forward(self, inputs):
+        # print(inputs.shape,'222222222222222',self.main(inputs).shape)
+        return self.main(inputs)
+class DenseTransitionBlockDecoder(nn.Module):
+    def __init__(self, n_channels_in, n_channels_out, activation=nn.ReLU, args=[False]):
+        super(DenseTransitionBlockDecoder, self).__init__()
+        self.n_channels_in  = n_channels_in
+        self.n_channels_out = n_channels_out
+        self.main           = nn.Sequential(
+                nn.BatchNorm2d(n_channels_in),
+                activation(*args),
+                nn.ConvTranspose2d(n_channels_in, n_channels_out, 4, stride=2, padding=1, bias=False),
+        )
+    def forward(self, inputs):
+        # print(inputs.shape,'333333333333',self.main(inputs).shape)
+        return self.main(inputs)
+## Dense encoders and decoders for image of size 128 128
+class waspDenseEncoder128(nn.Module):
+    def __init__(self, nc=1, ndf = 32, ndim = 128, activation=nn.LeakyReLU, args=[0.2, False], f_activation=nn.Tanh, f_args=[]):
+        super(waspDenseEncoder128, self).__init__()
+        self.ndim = ndim
+        self.main = nn.Sequential(
+                # input is (nc) x 128 x 128
+                nn.BatchNorm2d(nc),
+                nn.ReLU(True),
+                nn.Conv2d(nc, ndf, 4, stride=2, padding=1),
+                # state size. (ndf) x 64 x 64
+                DenseBlockEncoder(ndf, 6),
+                DenseTransitionBlockEncoder(ndf, ndf*2, 2, activation=activation, args=args),
+                # state size. (ndf*2) x 32 x 32
+                DenseBlockEncoder(ndf*2, 12),
+                DenseTransitionBlockEncoder(ndf*2, ndf*4, 2, activation=activation, args=args),
+                # state size. (ndf*4) x 16 x 16
+                DenseBlockEncoder(ndf*4, 16),
+                DenseTransitionBlockEncoder(ndf*4, ndf*8, 2, activation=activation, args=args),
+                # state size. (ndf*4) x 8 x 8
+                DenseBlockEncoder(ndf*8, 16),
+                DenseTransitionBlockEncoder(ndf*8, ndf*8, 2, activation=activation, args=args),
+                # state size. (ndf*8) x 4 x 4
+                DenseBlockEncoder(ndf*8, 16),
+                DenseTransitionBlockEncoder(ndf*8, ndim, 4, activation=activation, args=args),
+                f_activation(*f_args),
+        )
+    def forward(self, input):
+        input=add_coordConv_channels(input)
+        output = self.main(input).view(-1,self.ndim)
+        #print(output.size())
+        return output
+class waspDenseDecoder128(nn.Module):
+    def __init__(self, nz=128, nc=1, ngf=32, lb=0, ub=1, activation=nn.ReLU, args=[False], f_activation=nn.Hardtanh, f_args=[]):
+        super(waspDenseDecoder128, self).__init__()
+        self.main   = nn.Sequential(
+            # input is Z, going into convolution
+            nn.BatchNorm2d(nz),
+            activation(*args),
+            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
+            # state size. (ngf*8) x 4 x 4
+            DenseBlockDecoder(ngf*8, 16),
+            DenseTransitionBlockDecoder(ngf*8, ngf*8),
+            # state size. (ngf*4) x 8 x 8
+            DenseBlockDecoder(ngf*8, 16),
+            DenseTransitionBlockDecoder(ngf*8, ngf*4),
+            # state size. (ngf*2) x 16 x 16
+            DenseBlockDecoder(ngf*4, 12),
+            DenseTransitionBlockDecoder(ngf*4, ngf*2),
+            # state size. (ngf) x 32 x 32
+            DenseBlockDecoder(ngf*2, 6),
+            DenseTransitionBlockDecoder(ngf*2, ngf),
+            # state size. (ngf) x 64 x 64
+            DenseBlockDecoder(ngf, 6),
+            DenseTransitionBlockDecoder(ngf, ngf),
+            # state size (ngf) x 128 x 128
+            nn.BatchNorm2d(ngf),
+            activation(*args),
+            nn.ConvTranspose2d(ngf, nc, 3, stride=1, padding=1, bias=False),
+            f_activation(*f_args),
+        )
+        # self.smooth=nn.Sequential(
+        #     nn.Conv2d(nc, nc, 1, stride=1, padding=0, bias=False),
+        #     f_activation(*f_args),
+        # )
+    def forward(self, inputs):
+        # return self.smooth(self.main(inputs))
+        return self.main(inputs)
+## Dense encoders and decoders for image of size 512 512
+class waspDenseEncoder512(nn.Module):
+    def __init__(self, nc=1, ndf = 32, ndim = 128, activation=nn.LeakyReLU, args=[0.2, False], f_activation=nn.Tanh, f_args=[]):
+        super(waspDenseEncoder512, self).__init__()
+        self.ndim = ndim
+        self.main = nn.Sequential(
+                # input is (nc) x 128 x 128  > *4
+                nn.BatchNorm2d(nc),
+                nn.ReLU(True),
+                nn.Conv2d(nc, ndf, 4, stride=2, padding=1),
+                # state size. (ndf) x 64 x 64  > *4
+                DenseBlockEncoder(ndf, 6),
+                DenseTransitionBlockEncoder(ndf, ndf*2, 2, activation=activation, args=args),
+                # state size. (ndf*2) x 32 x 32 > *4
+                DenseBlockEncoder(ndf*2, 12),
+                DenseTransitionBlockEncoder(ndf*2, ndf*4, 2, activation=activation, args=args),
+                # state size. (ndf*4) x 16 x 16  > *4
+                DenseBlockEncoder(ndf*4, 16),
+                DenseTransitionBlockEncoder(ndf*4, ndf*8, 2, activation=activation, args=args),
+                # state size. (ndf*8) x 8 x 8   *4
+                DenseBlockEncoder(ndf*8, 16),
+                DenseTransitionBlockEncoder(ndf*8, ndf*8, 2, activation=activation, args=args),
+                # state size. (ndf*8) x 4 x 4 > *4
+                DenseBlockEncoder(ndf*8, 16),
+                DenseTransitionBlockEncoder(ndf*8, ndf*8, 4, activation=activation, args=args),
+                f_activation(*f_args),
+                # state size. (ndf*8) x 2 x 2 > *4
+                DenseBlockEncoder(ndf*8, 16),
+                DenseTransitionBlockEncoder(ndf*8, ndim, 4, activation=activation, args=args),
+                f_activation(*f_args),
+        )
+    def forward(self, input):
+        input=add_coordConv_channels(input)
+        output = self.main(input).view(-1,self.ndim)
+        # output = self.main(input).view(8,-1)
+        # print(input.shape,'---------------------')
+        #print(output.size())
+        return output
+class waspDenseDecoder512(nn.Module):
+    def __init__(self, nz=128, nc=1, ngf=32, lb=0, ub=1, activation=nn.ReLU, args=[False], f_activation=nn.Tanh, f_args=[]):
+        super(waspDenseDecoder512, self).__init__()
+        self.main   = nn.Sequential(
+            # input is Z, going into convolution
+            nn.BatchNorm2d(nz),
+            activation(*args),
+            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
+            # state size. (ngf*8) x 4 x 4
+            DenseBlockDecoder(ngf*8, 16),
+            DenseTransitionBlockDecoder(ngf*8, ngf*8),
+            # state size. (ngf*8) x 8 x 8
+            DenseBlockDecoder(ngf*8, 16),
+            DenseTransitionBlockDecoder(ngf*8, ngf*8),
+            # state size. (ngf*4) x 16 x 16
+            DenseBlockDecoder(ngf*8, 16),
+            DenseTransitionBlockDecoder(ngf*8, ngf*4),
+            # state size. (ngf*2) x 32 x 32
+            DenseBlockDecoder(ngf*4, 12),
+            DenseTransitionBlockDecoder(ngf*4, ngf*2),
+            # state size. (ngf) x 64 x 64
+            DenseBlockDecoder(ngf*2, 6),
+            DenseTransitionBlockDecoder(ngf*2, ngf),
+            # state size. (ngf) x 128 x 128
+            DenseBlockDecoder(ngf, 6),
+            DenseTransitionBlockDecoder(ngf, ngf),
+            # state size. (ngf) x 256 x 256
+            DenseBlockDecoder(ngf, 6),
+            DenseTransitionBlockDecoder(ngf, ngf),
+            # state size (ngf) x 512 x 512
+            nn.BatchNorm2d(ngf),
+            activation(*args),
+            nn.ConvTranspose2d(ngf, nc, 3, stride=1, padding=1, bias=False),
+            f_activation(*f_args),
+        )
+        # self.smooth=nn.Sequential(
+        #     nn.Conv2d(nc, nc, 1, stride=1, padding=0, bias=False),
+        #     f_activation(*f_args),
+        # )
+    def forward(self, inputs):
+        # return self.smooth(self.main(inputs))
+        return self.main(inputs)
+class dnetccnl(nn.Module):
+    #in_channels -> nc      | encoder first layer
+    #filters -> ndf    | encoder first layer
+    #img_size(h,w) -> ndim
+    #out_channels  -> optical flow (x,y)
+    def __init__(self, img_size=448, in_channels=3, out_channels=2, filters=32,fc_units=100):
+        super(dnetccnl, self).__init__()
+        self.nc=in_channels
+        self.nf=filters
+        self.ndim=img_size
+        self.oc=out_channels
+        self.fcu=fc_units
+        self.encoder=waspDenseEncoder128(nc=self.nc+2,ndf=self.nf,ndim=self.ndim)
+        self.decoder=waspDenseDecoder128(nz=self.ndim,nc=self.oc,ngf=self.nf)
+        # self.fc_layers= nn.Sequential(nn.Linear(self.ndim, self.fcu),
+        #                               nn.ReLU(True),
+        #                               nn.Dropout(0.25),
+        #                               nn.Linear(self.fcu,self.ndim),
+        #                               nn.ReLU(True),
+        #                               nn.Dropout(0.25),
+        #                               )
+    def forward(self, inputs):
+        encoded=self.encoder(inputs)
+        encoded=encoded.unsqueeze(-1).unsqueeze(-1)
+        decoded=self.decoder(encoded)
+        # print torch.max(decoded)
+        # print torch.min(decoded)
+        # print(decoded.shape,'11111111111111111',encoded.shape)
+        return decoded
+class dnetccnl512(nn.Module):
+    #in_channels -> nc      | encoder first layer
+    #filters -> ndf    | encoder first layer
+    #img_size(h,w) -> ndim
+    #out_channels  -> optical flow (x,y)
+    def __init__(self, img_size=448, in_channels=3, out_channels=2, filters=32,fc_units=100):
+        super(dnetccnl512, self).__init__()
+        self.nc=in_channels
+        self.nf=filters
+        self.ndim=img_size
+        self.oc=out_channels
+        self.fcu=fc_units
+        self.encoder=waspDenseEncoder512(nc=self.nc+2,ndf=self.nf,ndim=self.ndim)
+        self.decoder=waspDenseDecoder512(nz=self.ndim,nc=self.oc,ngf=self.nf)
+        # self.fc_layers= nn.Sequential(nn.Linear(self.ndim, self.fcu),
+        #                               nn.ReLU(True),
+        #                               nn.Dropout(0.25),
+        #                               nn.Linear(self.fcu,self.ndim),
+        #                               nn.ReLU(True),
+        #                               nn.Dropout(0.25),
+        #                               )
+    def forward(self, inputs):
+        encoded=self.encoder(inputs)
+        encoded=encoded.unsqueeze(-1).unsqueeze(-1)
+        decoded=self.decoder(encoded)
+        # print torch.max(decoded)
+        # print torch.min(decoded)
+        # print(decoded.shape,'11111111111111111',encoded.shape)
+        return decoded

data/MBD/model/gienet.py ADDED Viewed

	@@ -0,0 +1,742 @@

+from math import log
+import torch
+import torch.nn as nn
+from torch.nn import init
+import functools
+from model.cbam import CBAM
+# Defines the Unet generator.
+# |num_downs|: number of downsamplings in UNet. For example,
+# if |num_downs| == 7, image of size 128x128 will become of size 1x1
+# at the bottleneck
+class SingleConv(nn.Module):
+    """(convolution => [BN] => ReLU) * 2"""
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.double_conv = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=0,stride=1),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+            # nn.ReflectionPad2d(1),
+            # nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=0,stride=1),
+            # nn.BatchNorm2d(out_channels),
+            # nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        return self.double_conv(x)
+class Down_single(nn.Module):
+    """Downscaling with maxpool then double conv"""
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.maxpool_conv = nn.Sequential(
+            nn.MaxPool2d(2),
+            SingleConv(in_channels, out_channels)
+        )
+    def forward(self, x):
+        return self.maxpool_conv(x)
+class Up_single(nn.Module):
+    """Upscaling then double conv"""
+    def __init__(self, in_channels, out_channels, bilinear=True):
+        super().__init__()
+        self.up = nn.Upsample(scale_factor=2, mode='nearest')
+        self.conv = SingleConv(in_channels, out_channels)
+        self.deconv = nn.ConvTranspose2d(in_channels, out_channels,kernel_size=4, stride=2,padding=1, bias=True)
+    def forward(self, x1, x2):
+        x1 = self.deconv(x1)
+        # input is BCHW
+        x = torch.cat([x2, x1], dim=1)
+        return self.conv(x)
+class DoubleConv(nn.Module):
+    """(convolution => [BN] => ReLU) * 2"""
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.double_conv = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=0,stride=1),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=0,stride=1),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        return self.double_conv(x)
+class Down(nn.Module):
+    """Downscaling with maxpool then double conv"""
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.maxpool_conv = nn.Sequential(
+            nn.MaxPool2d(2),
+            DoubleConv(in_channels, out_channels)
+        )
+    def forward(self, x):
+        return self.maxpool_conv(x)
+class Up(nn.Module):
+    """Upscaling then double conv"""
+    def __init__(self, in_channels, out_channels, bilinear=True):
+        super().__init__()
+        self.up = nn.Upsample(scale_factor=2, mode='nearest')
+        self.conv = DoubleConv(in_channels, out_channels)
+        self.deconv = nn.ConvTranspose2d(in_channels, out_channels,kernel_size=4, stride=2,padding=1, bias=True)
+    def forward(self, x1, x2):
+        x1 = self.deconv(x1)
+        # input is BCHW
+        x = torch.cat([x2, x1], dim=1)
+        return self.conv(x)
+class OutConv(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(OutConv, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
+        self.tanh = nn.Tanh()
+        self.hardtanh = nn.Hardtanh()
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, x1):
+        x = self.conv(x1)
+        # x = self.sigmoid(x)
+        # x = self.hardtanh(x)
+        # x = (x+1)/2
+        return x
+class GiemaskGenerator(nn.Module):
+    """Create a Unet-based generator"""
+    def __init__(self, input_nc, output_nc, num_downs, ngf=64, biline=True, norm_layer=nn.BatchNorm2d, use_dropout=False):
+        """Construct a Unet generator
+        Parameters:
+            input_nc (int)  -- the number of channels in input images
+            output_nc (int) -- the number of channels in output images
+            num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7,
+                                image of size 128x128 will become of size 1x1 # at the bottleneck
+            ngf (int)       -- the number of filters in the last conv layer
+            norm_layer      -- normalization layer
+        We construct the U-Net from the innermost layer to the outermost layer.
+        It is a recursive process.
+        """
+        super(GiemaskGenerator, self).__init__()
+        self.init_channel =32
+        self.inc = DoubleConv(3,self.init_channel)
+        self.down1 = Down(self.init_channel, self.init_channel*2)
+        self.down2 = Down(self.init_channel*2, self.init_channel*4)
+        self.down3 = Down(self.init_channel*4, self.init_channel*8)
+        self.down4 = Down(self.init_channel*8, self.init_channel*16)
+        self.down5 = Down(self.init_channel*16, self.init_channel*32)
+        self.up1 = Up(self.init_channel*32, self.init_channel*16)
+        self.up2 = Up(self.init_channel*16, self.init_channel*8)
+        self.up3 = Up(self.init_channel*8, self.init_channel*4)
+        self.up4 = Up(self.init_channel*4,self.init_channel*2)
+        self.up5 = Up(self.init_channel*2, self.init_channel)
+        self.outc = OutConv(self.init_channel, 1)
+        self.up1_1 = Up_single(self.init_channel*32, self.init_channel*16)
+        self.up2_1 = Up_single(self.init_channel*16, self.init_channel*8)
+        self.up3_1 = Up_single(self.init_channel*8, self.init_channel*4)
+        self.up4_1 = Up_single(self.init_channel*4,self.init_channel*2)
+        self.up5_1 = Up_single(self.init_channel*2, self.init_channel)
+        self.outc_1 = OutConv(self.init_channel, 1)
+  #      self.dropout = nn.Dropout(p=0.5)
+    def forward(self, input):
+        x1 = self.inc(input)
+        x2 = self.down1(x1)
+        x3 = self.down2(x2)
+        x4 = self.down3(x3)
+        x5 = self.down4(x4)
+        x6 = self.down5(x5)
+        x_1 = self.up1_1(x6, x5)
+        x_1 = self.up2_1(x_1, x4)
+        x_1 = self.up3_1(x_1, x3)
+        x_1 = self.up4_1(x_1, x2)
+        x_1 = self.up5_1(x_1, x1)
+        mask = self.outc_1(x_1)
+        x = self.up1(x6, x5)
+#        x = self.dropout(x)
+        x = self.up2(x, x4)
+#        x = self.dropout(x)
+        x = self.up3(x, x3)
+#        x = self.dropout(x)
+        x = self.up4(x, x2)
+#        x = self.dropout(x)
+        x = self.up5(x, x1)
+#        x = self.dropout(x)
+        depth = self.outc(x)
+        return depth,mask
+    """Create a Unet-based generator"""
+class Giemask2Generator(nn.Module):
+    """Create a Unet-based generator"""
+    def __init__(self, input_nc, output_nc, num_downs, ngf=64, biline=True, norm_layer=nn.BatchNorm2d, use_dropout=False):
+        """Construct a Unet generator
+        Parameters:
+            input_nc (int)  -- the number of channels in input images
+            output_nc (int) -- the number of channels in output images
+            num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7,
+                                image of size 128x128 will become of size 1x1 # at the bottleneck
+            ngf (int)       -- the number of filters in the last conv layer
+            norm_layer      -- normalization layer
+        We construct the U-Net from the innermost layer to the outermost layer.
+        It is a recursive process.
+        """
+        super(Giemask2Generator, self).__init__()
+        self.init_channel =32
+        self.inc = DoubleConv(3,self.init_channel)
+        self.down1 = Down(self.init_channel, self.init_channel*2)
+        self.down2 = Down(self.init_channel*2, self.init_channel*4)
+        self.down3 = Down(self.init_channel*4, self.init_channel*8)
+        self.down4 = Down(self.init_channel*8, self.init_channel*16)
+        self.down5 = Down(self.init_channel*16, self.init_channel*32)
+        self.up1 = Up(self.init_channel*32, self.init_channel*16)
+        self.up2 = Up(self.init_channel*16, self.init_channel*8)
+        self.up3 = Up(self.init_channel*8, self.init_channel*4)
+        self.up4 = Up(self.init_channel*4,self.init_channel*2)
+        self.up5 = Up(self.init_channel*2, self.init_channel)
+        self.outc = OutConv(self.init_channel, 1)
+        self.up1_1 = Up_single(self.init_channel*32, self.init_channel*16)
+        self.up2_1 = Up_single(self.init_channel*16, self.init_channel*8)
+        self.up3_1 = Up_single(self.init_channel*8, self.init_channel*4)
+        self.up4_1 = Up_single(self.init_channel*4,self.init_channel*2)
+        self.up5_1 = Up_single(self.init_channel*2, self.init_channel)
+        self.outc_1 = OutConv(self.init_channel, 1)
+        self.outc_2 = OutConv(self.init_channel, 1)
+  #      self.dropout = nn.Dropout(p=0.5)
+    def forward(self, input):
+        x1 = self.inc(input)
+        x2 = self.down1(x1)
+        x3 = self.down2(x2)
+        x4 = self.down3(x3)
+        x5 = self.down4(x4)
+        x6 = self.down5(x5)
+        x_1 = self.up1_1(x6, x5)
+        x_1 = self.up2_1(x_1, x4)
+        x_1 = self.up3_1(x_1, x3)
+        x_1 = self.up4_1(x_1, x2)
+        x_1 = self.up5_1(x_1, x1)
+        mask = self.outc_1(x_1)
+        edge = self.outc_2(x_1)
+        x = self.up1(x6, x5)
+#        x = self.dropout(x)
+        x = self.up2(x, x4)
+#        x = self.dropout(x)
+        x = self.up3(x, x3)
+#        x = self.dropout(x)
+        x = self.up4(x, x2)
+#        x = self.dropout(x)
+        x = self.up5(x, x1)
+#        x = self.dropout(x)
+        depth = self.outc(x)
+        return depth,mask,edge
+    """Create a Unet-based generator"""
+class GieGenerator(nn.Module):
+    def __init__(self, input_nc, output_nc, num_downs, ngf=64, biline=True, norm_layer=nn.BatchNorm2d, use_dropout=False):
+        """Construct a Unet generator
+        Parameters:
+            input_nc (int)  -- the number of channels in input images
+            output_nc (int) -- the number of channels in output images
+            num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7,
+                                image of size 128x128 will become of size 1x1 # at the bottleneck
+            ngf (int)       -- the number of filters in the last conv layer
+            norm_layer      -- normalization layer
+        We construct the U-Net from the innermost layer to the outermost layer.
+        It is a recursive process.
+        """
+        super(GieGenerator, self).__init__()
+        self.init_channel =32
+        self.inc = DoubleConv(input_nc,self.init_channel)
+        self.down1 = Down(self.init_channel, self.init_channel*2)
+        self.down2 = Down(self.init_channel*2, self.init_channel*4)
+        self.down3 = Down(self.init_channel*4, self.init_channel*8)
+        self.down4 = Down(self.init_channel*8, self.init_channel*16)
+        self.down5 = Down(self.init_channel*16, self.init_channel*32)
+        self.up1 = Up(self.init_channel*32, self.init_channel*16)
+        self.up2 = Up(self.init_channel*16, self.init_channel*8)
+        self.up3 = Up(self.init_channel*8, self.init_channel*4)
+        self.up4 = Up(self.init_channel*4,self.init_channel*2)
+        self.up5 = Up(self.init_channel*2, self.init_channel)
+        self.outc = OutConv(self.init_channel, 2)
+  #      self.dropout = nn.Dropout(p=0.5)
+    def forward(self, input):
+        x1 = self.inc(input)
+        x2 = self.down1(x1)
+        x3 = self.down2(x2)
+        x4 = self.down3(x3)
+        x5 = self.down4(x4)
+        x6 = self.down5(x5)
+        x = self.up1(x6, x5)
+#        x = self.dropout(x)
+        x = self.up2(x, x4)
+#        x = self.dropout(x)
+        x = self.up3(x, x3)
+#        x = self.dropout(x)
+        x = self.up4(x, x2)
+#        x = self.dropout(x)
+        x = self.up5(x, x1)
+#        x = self.dropout(x)
+        logits1 = self.outc(x)
+        return logits1
+class GiecbamGenerator(nn.Module):
+    def __init__(self, input_nc, output_nc, num_downs, ngf=64, biline=True, norm_layer=nn.BatchNorm2d, use_dropout=False):
+        """Construct a Unet generator
+        Parameters:
+            input_nc (int)  -- the number of channels in input images
+            output_nc (int) -- the number of channels in output images
+            num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7,
+                                image of size 128x128 will become of size 1x1 # at the bottleneck
+            ngf (int)       -- the number of filters in the last conv layer
+            norm_layer      -- normalization layer
+        We construct the U-Net from the innermost layer to the outermost layer.
+        It is a recursive process.
+        """
+        super(GiecbamGenerator, self).__init__()
+        self.init_channel =32
+        self.inc = DoubleConv(input_nc,self.init_channel)
+        self.down1 = Down(self.init_channel, self.init_channel*2)
+        self.down2 = Down(self.init_channel*2, self.init_channel*4)
+        self.down3 = Down(self.init_channel*4, self.init_channel*8)
+        self.down4 = Down(self.init_channel*8, self.init_channel*16)
+        self.down5 = Down(self.init_channel*16, self.init_channel*32)
+        self.cbam = CBAM(gate_channels=self.init_channel*32)
+        self.up1 = Up(self.init_channel*32, self.init_channel*16)
+        self.up2 = Up(self.init_channel*16, self.init_channel*8)
+        self.up3 = Up(self.init_channel*8, self.init_channel*4)
+        self.up4 = Up(self.init_channel*4,self.init_channel*2)
+        self.up5 = Up(self.init_channel*2, self.init_channel)
+        self.outc = OutConv(self.init_channel, 2)
+        self.dropout = nn.Dropout(p=0.1)
+    def forward(self, input):
+        x1 = self.inc(input)
+        x2 = self.down1(x1)
+        x3 = self.down2(x2)
+        x4 = self.down3(x3)
+        x5 = self.down4(x4)
+        x6 = self.down5(x5)
+        x6 = self.cbam(x6)
+        x = self.up1(x6, x5)
+        x = self.up2(x, x4)
+        x = self.up3(x, x3)
+        x = self.up4(x, x2)
+        x = self.up5(x, x1)
+        x = self.dropout(x)
+        logits1 = self.outc(x)
+        return logits1
+class Gie2headGenerator(nn.Module):
+    def __init__(self, input_nc, output_nc, num_downs, ngf=64, biline=True, norm_layer=nn.BatchNorm2d, use_dropout=False):
+        """Construct a Unet generator
+        Parameters:
+            input_nc (int)  -- the number of channels in input images
+            output_nc (int) -- the number of channels in output images
+            num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7,
+                                image of size 128x128 will become of size 1x1 # at the bottleneck
+            ngf (int)       -- the number of filters in the last conv layer
+            norm_layer      -- normalization layer
+        We construct the U-Net from the innermost layer to the outermost layer.
+        It is a recursive process.
+        """
+        super(Gie2headGenerator, self).__init__()
+        self.init_channel =32
+        self.inc = DoubleConv(input_nc,self.init_channel)
+        self.down1 = Down(self.init_channel, self.init_channel*2)
+        self.down2 = Down(self.init_channel*2, self.init_channel*4)
+        self.down3 = Down(self.init_channel*4, self.init_channel*8)
+        self.down4 = Down(self.init_channel*8, self.init_channel*16)
+        self.down5 = Down(self.init_channel*16, self.init_channel*32)
+        self.up1_1 = Up(self.init_channel*32, self.init_channel*16)
+        self.up2_1 = Up(self.init_channel*16, self.init_channel*8)
+        self.up3_1 = Up(self.init_channel*8, self.init_channel*4)
+        self.up4_1 = Up(self.init_channel*4,self.init_channel*2)
+        self.up5_1 = Up(self.init_channel*2, self.init_channel)
+        self.outc_1 = OutConv(self.init_channel, 1)
+        self.up1_2 = Up(self.init_channel*32, self.init_channel*16)
+        self.up2_2 = Up(self.init_channel*16, self.init_channel*8)
+        self.up3_2 = Up(self.init_channel*8, self.init_channel*4)
+        self.up4_2 = Up(self.init_channel*4,self.init_channel*2)
+        self.up5_2 = Up(self.init_channel*2, self.init_channel)
+        self.outc_2 = OutConv(self.init_channel, 1)
+    def forward(self, input):
+        x1 = self.inc(input)
+        x2 = self.down1(x1)
+        x3 = self.down2(x2)
+        x4 = self.down3(x3)
+        x5 = self.down4(x4)
+        x6 = self.down5(x5)
+        x_1 = self.up1_1(x6, x5)
+        x_1 = self.up2_1(x_1, x4)
+        x_1 = self.up3_1(x_1, x3)
+        x_1 = self.up4_1(x_1, x2)
+        x_1 = self.up5_1(x_1, x1)
+        logits_1 = self.outc_1(x_1)
+        x_2 = self.up1_2(x6, x5)
+        x_2 = self.up2_2(x_2, x4)
+        x_2 = self.up3_2(x_2, x3)
+        x_2 = self.up4_2(x_2, x2)
+        x_2 = self.up5_2(x_2, x1)
+        logits_2 = self.outc_2(x_2)
+        logits = torch.cat((logits_1,logits_2),1)
+        return logits
+class BmpGenerator(nn.Module):
+    def __init__(self, input_nc, output_nc, num_downs, ngf=64, biline=True, norm_layer=nn.BatchNorm2d, use_dropout=False):
+        """Construct a Unet generator
+        Parameters:
+            input_nc (int)  -- the number of channels in input images
+            output_nc (int) -- the number of channels in output images
+            num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7,
+                                image of size 128x128 will become of size 1x1 # at the bottleneck
+            ngf (int)       -- the number of filters in the last conv layer
+            norm_layer      -- normalization layer
+        We construct the U-Net from the innermost layer to the outermost layer.
+        It is a recursive process.
+        """
+        super(BmpGenerator, self).__init__()
+        self.init_channel =32
+        self.output_nc = output_nc
+        self.inc = DoubleConv(input_nc,self.init_channel)
+        self.down1 = Down(self.init_channel, self.init_channel*2)
+        self.down2 = Down(self.init_channel*2, self.init_channel*4)
+        self.down3 = Down(self.init_channel*4, self.init_channel*8)
+        self.down4 = Down(self.init_channel*8, self.init_channel*16)
+        self.down5 = Down(self.init_channel*16, self.init_channel*32)
+        self.up1 = Up(self.init_channel*32, self.init_channel*16)
+        self.up2 = Up(self.init_channel*16, self.init_channel*8)
+        self.up3 = Up(self.init_channel*8, self.init_channel*4)
+        self.up4 = Up(self.init_channel*4,self.init_channel*2)
+        self.up5 = Up(self.init_channel*2, self.init_channel)
+        self.outc = OutConv(self.init_channel, self.output_nc)
+  #      self.dropout = nn.Dropout(p=0.5)
+    def forward(self, input):
+        x1 = self.inc(input)
+        x2 = self.down1(x1)
+        x3 = self.down2(x2)
+        x4 = self.down3(x3)
+        x5 = self.down4(x4)
+        x6 = self.down5(x5)
+        x = self.up1(x6, x5)
+#        x = self.dropout(x)
+        x = self.up2(x, x4)
+#        x = self.dropout(x)
+        x = self.up3(x, x3)
+#        x = self.dropout(x)
+        x = self.up4(x, x2)
+#        x = self.dropout(x)
+        x = self.up5(x, x1)
+#        x = self.dropout(x)
+        logits1 = self.outc(x)
+        return logits1
+class Bmp2Generator(nn.Module):
+    """Create a Unet-based generator"""
+    def __init__(self, input_nc, output_nc, num_downs, ngf=64, biline=True, norm_layer=nn.BatchNorm2d, use_dropout=False):
+        """Construct a Unet generator
+        Parameters:
+            input_nc (int)  -- the number of channels in input images
+            output_nc (int) -- the number of channels in output images
+            num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7,
+                                image of size 128x128 will become of size 1x1 # at the bottleneck
+            ngf (int)       -- the number of filters in the last conv layer
+            norm_layer      -- normalization layer
+        We construct the U-Net from the innermost layer to the outermost layer.
+        It is a recursive process.
+        """
+        super(Bmp2Generator, self).__init__()
+        #gienet
+        self.init_channel =32
+        self.inc = DoubleConv(3,self.init_channel)
+        self.down1 = Down(self.init_channel, self.init_channel*2)
+        self.down2 = Down(self.init_channel*2, self.init_channel*4)
+        self.down3 = Down(self.init_channel*4, self.init_channel*8)
+        self.down4 = Down(self.init_channel*8, self.init_channel*16)
+        self.down5 = Down(self.init_channel*16, self.init_channel*32)
+        self.up1 = Up(self.init_channel*32, self.init_channel*16)
+        self.up2 = Up(self.init_channel*16, self.init_channel*8)
+        self.up3 = Up(self.init_channel*8, self.init_channel*4)
+        self.up4 = Up(self.init_channel*4,self.init_channel*2)
+        self.up5 = Up(self.init_channel*2, self.init_channel)
+        self.outc = OutConv(self.init_channel, 1)
+        self.up1_1 = Up_single(self.init_channel*32, self.init_channel*16)
+        self.up2_1 = Up_single(self.init_channel*16, self.init_channel*8)
+        self.up3_1 = Up_single(self.init_channel*8, self.init_channel*4)
+        self.up4_1 = Up_single(self.init_channel*4,self.init_channel*2)
+        self.up5_1 = Up_single(self.init_channel*2, self.init_channel)
+        self.outc_1 = OutConv(self.init_channel, 1)
+        self.outc_2 = OutConv(self.init_channel, 1)
+        #bpm net
+        self.inc_b = DoubleConv(4,self.init_channel)
+        self.down1_b = Down(self.init_channel, self.init_channel*2)
+        self.down2_b = Down(self.init_channel*2, self.init_channel*4)
+        self.down3_b = Down(self.init_channel*4, self.init_channel*8)
+        self.down4_b = Down(self.init_channel*8, self.init_channel*16)
+        self.down5_b = Down(self.init_channel*16, self.init_channel*32)
+        self.up1_b = Up(self.init_channel*32, self.init_channel*16)
+        self.up2_b = Up(self.init_channel*16, self.init_channel*8)
+        self.up3_b = Up(self.init_channel*8, self.init_channel*4)
+        self.up4_b = Up(self.init_channel*4,self.init_channel*2)
+        self.up5_b = Up(self.init_channel*2, self.init_channel)
+        self.outc_b = OutConv(self.init_channel, 2)
+  #      self.dropout = nn.Dropout(p=0.5)
+    def forward(self, input):
+        #gienet
+        x1 = self.inc(input)
+        x2 = self.down1(x1)
+        x3 = self.down2(x2)
+        x4 = self.down3(x3)
+        x5 = self.down4(x4)
+        x6 = self.down5(x5)
+        x_1 = self.up1_1(x6, x5)
+        x_1 = self.up2_1(x_1, x4)
+        x_1 = self.up3_1(x_1, x3)
+        x_1 = self.up4_1(x_1, x2)
+        x_1 = self.up5_1(x_1, x1)
+        mask = self.outc_1(x_1)
+        edge = self.outc_2(x_1)
+        x = self.up1(x6, x5)
+        x = self.up2(x, x4)
+        x = self.up3(x, x3)
+        x = self.up4(x, x2)
+        x = self.up5(x, x1)
+        depth = self.outc(x)
+        #bmpnet
+        mask[mask>0.5]=1.
+        mask[mask<=0.5]=0.
+        image_cat_depth = torch.cat((input*mask,depth*mask),dim=1)
+        x1_b = self.inc_b(image_cat_depth)
+        x2_b = self.down1_b(x1_b)
+        x3_b = self.down2_b(x2_b)
+        x4_b = self.down3_b(x3_b)
+        x5_b = self.down4_b(x4_b)
+        x6_b = self.down5_b(x5_b)
+        x_b = self.up1_b(x6_b, x5_b)
+        x_b = self.up2_b(x_b, x4_b)
+        x_b = self.up3_b(x_b, x3_b)
+        x_b = self.up4_b(x_b, x2_b)
+        x_b = self.up5_b(x_b, x1_b)
+        bm = self.outc_b(x_b)
+        # return depth,mask,edge,bm
+        return bm
+class UnetGenerator(nn.Module):
+    def __init__(self, input_nc, output_nc, num_downs, ngf=64,
+                 norm_layer=nn.BatchNorm2d, use_dropout=False):
+        super(UnetGenerator, self).__init__()
+        # construct unet structure
+        unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True)
+        for i in range(num_downs - 5):
+            unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout)
+        unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+        unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+        unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+        unet_block = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer)
+        self.model = unet_block
+    def forward(self, input):
+        return self.model(input)
+#class GieGenerator(nn.Module):
+#    def __init__(self, input_nc, output_nc, num_downs, ngf=64,
+#                 norm_layer=nn.BatchNorm2d, use_dropout=False):
+#        super(GieGenerator, self).__init__()
+#
+#        # construct unet structure
+#        unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True)
+#        for i in range(num_downs - 5):
+#            unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout)
+#        unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+#        unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+#        unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+#        unet_block = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer)
+#
+#        self.model = unet_block
+#
+#    def forward(self, input):
+#        return self.model(input)
+# Defines the submodule with skip connection.
+# X -------------------identity---------------------- X
+#   |-- downsampling -- |submodule| -- upsampling --|
+class UnetSkipConnectionBlock(nn.Module):
+    def __init__(self, outer_nc, inner_nc, input_nc=None,
+                 submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False):
+        super(UnetSkipConnectionBlock, self).__init__()
+        self.outermost = outermost
+        if type(norm_layer) == functools.partial:
+            use_bias = norm_layer.func == nn.InstanceNorm2d
+        else:
+            use_bias = norm_layer == nn.InstanceNorm2d
+        if input_nc is None:
+            input_nc = outer_nc
+        downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4,
+                             stride=2, padding=1, bias=use_bias)
+        downrelu = nn.LeakyReLU(0.2, True)
+        downnorm = norm_layer(inner_nc)
+        uprelu = nn.ReLU(True)
+        upnorm = norm_layer(outer_nc)
+        if outermost:
+            upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
+                                        kernel_size=4, stride=2,
+                                        padding=1)
+            down = [downconv]
+            up = [uprelu, upconv, nn.Tanh()]
+            model = down + [submodule] + up
+        elif innermost:
+           # resize = nn.Upsample(scale_factor=2)
+           # conv = nn.Conv2d(inner_nc,outer_nc,kernel_size=4,stride=2,padding=1,bias=use_bias)
+            upconv = nn.ConvTranspose2d(inner_nc, outer_nc,
+                                        kernel_size=4, stride=2,
+                                        padding=1, bias=use_bias)
+            down = [downrelu, downconv]
+            up = [uprelu, upconv, upnorm]
+            #up = [uprelu, resize, conv, upnorm]
+            model = down + up
+        else:
+            upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
+                                        kernel_size=4, stride=2,
+                                        padding=1, bias=use_bias)
+            down = [downrelu, downconv, downnorm]
+            up = [uprelu, upconv, upnorm]
+            if use_dropout:
+                model = down + [submodule] + up + [nn.Dropout(0.5)]
+            else:
+                model = down + [submodule] + up
+        self.model = nn.Sequential(*model)
+    def forward(self, x):
+        if self.outermost:
+            return self.model(x)
+        else:
+            return torch.cat([x, self.model(x)], 1)
+##===================================================================================================
+class DilatedDoubleConv(nn.Module):
+    """(convolution => [BN] => ReLU) * 2"""
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.double_conv = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=4,stride=1,dilation=4),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=4,stride=1,dilation=4),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        return self.double_conv(x)
+class DilatedDown(nn.Module):
+    """Downscaling with maxpool then double conv"""
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.maxpool_conv = nn.Sequential(
+            nn.MaxPool2d(2),
+            DilatedDoubleConv(in_channels, out_channels)
+        )
+    def forward(self, x):
+        return self.maxpool_conv(x)
+class DilatedUp(nn.Module):
+    """Upscaling then double conv"""
+    def __init__(self, in_channels, out_channels, bilinear=True):
+        super().__init__()
+        self.up = nn.Upsample(scale_factor=2, mode='nearest')
+        self.conv = DilatedDoubleConv(in_channels, out_channels)
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=4,stride=1,dilation=4),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+        )
+        # self.deconv = nn.ConvTranspose2d(in_channels, out_channels,kernel_size=4, stride=2,padding=1, bias=True)
+    def forward(self, x1, x2):
+        x1 = self.up(x1)
+        x1 = self.conv1(x1)
+        # x1 = self.deconv(x1)
+        # input is BCHW
+        x = torch.cat([x2, x1], dim=1)
+        return self.conv(x)
+class DilatedSingleUnet(nn.Module):
+    def __init__(self, input_nc, output_nc, num_downs, ngf=64, biline=True, norm_layer=nn.BatchNorm2d, use_dropout=False):
+        super(DilatedSingleUnet, self).__init__()
+        self.init_channel = 32
+        self.inc = DilatedDoubleConv(input_nc,self.init_channel)
+        self.down1 = DilatedDown(self.init_channel, self.init_channel*2)
+        self.down2 = DilatedDown(self.init_channel*2, self.init_channel*4)
+        self.down3 = DilatedDown(self.init_channel*4, self.init_channel*8)
+        self.down4 = DilatedDown(self.init_channel*8, self.init_channel*16)
+        self.down5 = DilatedDown(self.init_channel*16, self.init_channel*32)
+        self.cbam = CBAM(gate_channels=self.init_channel*32)
+        self.up1 = DilatedUp(self.init_channel*32, self.init_channel*16)
+        self.up2 = DilatedUp(self.init_channel*16, self.init_channel*8)
+        self.up3 = DilatedUp(self.init_channel*8, self.init_channel*4)
+        self.up4 = DilatedUp(self.init_channel*4,self.init_channel*2)
+        self.up5 = DilatedUp(self.init_channel*2, self.init_channel)
+        self.outc = OutConv(self.init_channel, output_nc)
+    def forward(self, input):
+        x1 = self.inc(input)
+        x2 = self.down1(x1)
+        x3 = self.down2(x2)
+        x4 = self.down3(x3)
+        x5 = self.down4(x4)
+        x6 = self.down5(x5)
+        x6 = self.cbam(x6)
+        x = self.up1(x6, x5)
+        x = self.up2(x, x4)
+        x = self.up3(x, x3)
+        x = self.up4(x, x2)
+        x = self.up5(x, x1)
+        logits1 = self.outc(x)
+        return logits1

data/MBD/model/unetnc.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import torch
+import torch.nn as nn
+from torch.nn import init
+import functools
+# Defines the Unet generator.
+# |num_downs|: number of downsamplings in UNet. For example,
+# if |num_downs| == 7, image of size 128x128 will become of size 1x1
+# at the bottleneck
+class UnetGenerator(nn.Module):
+    def __init__(self, input_nc, output_nc, num_downs, ngf=64,
+                 norm_layer=nn.BatchNorm2d, use_dropout=False):
+        super(UnetGenerator, self).__init__()
+        # construct unet structure
+        unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True)
+        for i in range(num_downs - 5):
+            unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout)
+        unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+        unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+        unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+        unet_block = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer)
+        self.model = unet_block
+    def forward(self, input):
+        return self.model(input)
+    def forward(self, input):
+        return self.model(input)
+# Defines the submodule with skip connection.
+# X -------------------identity---------------------- X
+#   |-- downsampling -- |submodule| -- upsampling --|
+class UnetSkipConnectionBlock(nn.Module):
+    def __init__(self, outer_nc, inner_nc, input_nc=None,
+                 submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False):
+        super(UnetSkipConnectionBlock, self).__init__()
+        self.outermost = outermost
+        if type(norm_layer) == functools.partial:
+            use_bias = norm_layer.func == nn.InstanceNorm2d
+        else:
+            use_bias = norm_layer == nn.InstanceNorm2d
+        if input_nc is None:
+            input_nc = outer_nc
+        downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4,
+                             stride=2, padding=1, bias=use_bias)
+        downrelu = nn.LeakyReLU(0.2, True)
+        downnorm = norm_layer(inner_nc)
+        uprelu = nn.ReLU(True)
+        upnorm = norm_layer(outer_nc)
+        if outermost:
+            upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
+                                        kernel_size=4, stride=2,
+                                        padding=1)
+            down = [downconv]
+            up = [uprelu, upconv, nn.Tanh()]
+            model = down + [submodule] + up
+        elif innermost:
+            upconv = nn.ConvTranspose2d(inner_nc, outer_nc,
+                                        kernel_size=4, stride=2,
+                                        padding=1, bias=use_bias)
+            down = [downrelu, downconv]
+            up = [uprelu, upconv, upnorm]
+            model = down + up
+        else:
+            upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
+                                        kernel_size=4, stride=2,
+                                        padding=1, bias=use_bias)
+            down = [downrelu, downconv, downnorm]
+            up = [uprelu, upconv, upnorm]
+            if use_dropout:
+                model = down + [submodule] + up + [nn.Dropout(0.5)]
+            else:
+                model = down + [submodule] + up
+        self.model = nn.Sequential(*model)
+    def forward(self, x):
+        if self.outermost:
+            return self.model(x)
+        else:
+            return torch.cat([x, self.model(x)], 1)

data/MBD/modify_stn_model/stn_head.py ADDED Viewed

	@@ -0,0 +1,123 @@

+from __future__ import absolute_import
+import math
+import numpy as np
+import sys
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.nn import init
+def conv3x3_block(in_planes, out_planes, stride=1):
+  """3x3 convolution with padding"""
+  conv_layer = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=1, padding=1)
+  block = nn.Sequential(
+    conv_layer,
+    nn.BatchNorm2d(out_planes),
+    nn.ReLU(inplace=True),
+  )
+  return block
+class STNHead(nn.Module):
+  def __init__(self, in_planes, num_ctrlpoints, activation='none'):
+    super(STNHead, self).__init__()
+    self.in_planes = in_planes
+    self.num_ctrlpoints = num_ctrlpoints
+    self.activation = activation
+    self.stn_convnet = nn.Sequential(
+                          conv3x3_block(in_planes, 32), # 32*64
+                          nn.MaxPool2d(kernel_size=2, stride=2),
+                          conv3x3_block(32, 64), # 16*32
+                          nn.MaxPool2d(kernel_size=2, stride=2),
+                          conv3x3_block(64, 128), # 8*16
+                          nn.MaxPool2d(kernel_size=2, stride=2),
+                          conv3x3_block(128, 256), # 4*8
+                          nn.MaxPool2d(kernel_size=2, stride=2),
+                          conv3x3_block(256, 256), # 2*4,
+                          nn.MaxPool2d(kernel_size=2, stride=2),
+                          conv3x3_block(256, 256)) # 1*2 > 256*8*8
+    self.stn_fc1 = nn.Sequential(
+                      # nn.Linear(2*256, 512),
+                      nn.Linear(8*8*256, 512),
+                      nn.BatchNorm1d(512),
+                      nn.ReLU(inplace=True))
+    self.stn_fc2 = nn.Linear(512, num_ctrlpoints*2)
+    self.init_weights(self.stn_convnet)
+    self.init_weights(self.stn_fc1)
+    self.init_stn(self.stn_fc2)
+  def init_weights(self, module):
+    for m in module.modules():
+      if isinstance(m, nn.Conv2d):
+        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+        m.weight.data.normal_(0, math.sqrt(2. / n))
+        if m.bias is not None:
+          m.bias.data.zero_()
+      elif isinstance(m, nn.BatchNorm2d):
+        m.weight.data.fill_(1)
+        m.bias.data.zero_()
+      elif isinstance(m, nn.Linear):
+        m.weight.data.normal_(0, 0.001)
+        m.bias.data.zero_()
+  def init_stn(self, stn_fc2):
+    # margin = 0.01
+    # sampling_num_per_side = int(self.num_ctrlpoints / 2)
+    # ctrl_pts_x = np.linspace(margin, 1.-margin, sampling_num_per_side)
+    # ctrl_pts_y_top = np.ones(sampling_num_per_side) * margin
+    # ctrl_pts_y_bottom = np.ones(sampling_num_per_side) * (1-margin)
+    # ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
+    # ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
+    # ctrl_points = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0).astype(np.float32)
+    margin_x, margin_y = 0.35,0.35
+    # margin_x, margin_y = 0,0
+    num_ctrl_pts_per_side = (self.num_ctrlpoints-4) // 4 +2
+    ctrl_pts_x = np.linspace(margin_x, 1.0 - margin_x, num_ctrl_pts_per_side)
+    ctrl_pts_y_top = np.ones(num_ctrl_pts_per_side) * margin_y
+    ctrl_pts_y_bottom = np.ones(num_ctrl_pts_per_side) * (1.0 - margin_y)
+    ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
+    ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
+    ctrl_pts_x_left = np.ones(num_ctrl_pts_per_side) * margin_x
+    ctrl_pts_x_right = np.ones(num_ctrl_pts_per_side) * (1.0-margin_x)
+    ctrl_pts_left = np.stack([ctrl_pts_x_left[1:-1], ctrl_pts_x[1:-1]], axis=1)
+    ctrl_pts_right = np.stack([ctrl_pts_x_right[1:-1], ctrl_pts_x[1:-1]], axis=1)
+    ctrl_points = np.concatenate([ctrl_pts_top, ctrl_pts_bottom, ctrl_pts_left, ctrl_pts_right],  axis=0).astype(np.float32)
+    if self.activation is 'none':
+      pass
+    elif self.activation == 'sigmoid':
+      ctrl_points = -np.log(1. / ctrl_points - 1.)
+    stn_fc2.weight.data.zero_()
+    stn_fc2.bias.data = torch.Tensor(ctrl_points).view(-1)
+  def forward(self, x):
+    x = self.stn_convnet(x)
+    batch_size, _, h, w = x.size()
+    x = x.view(batch_size, -1)
+    img_feat = self.stn_fc1(x)
+    x = self.stn_fc2(0.1 * img_feat)
+    if self.activation == 'sigmoid':
+      x = F.sigmoid(x)
+    x = x.view(-1, self.num_ctrlpoints, 2)
+    return img_feat, x
+if __name__ == "__main__":
+  in_planes = 3
+  num_ctrlpoints = 20
+  activation='none' # 'sigmoid'
+  stn_head = STNHead(in_planes, num_ctrlpoints, activation)
+  input = torch.randn(10, 3, 32, 64)
+  control_points = stn_head(input)
+  print(control_points.size())

data/MBD/modify_stn_model/tps_spatial_transformer.py ADDED Viewed

	@@ -0,0 +1,194 @@

+from __future__ import absolute_import
+import numpy as np
+import itertools
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def grid_sample(input, grid, canvas = None):
+  output = F.grid_sample(input, grid)
+  if canvas is None:
+    return output
+  else:
+    input_mask = input.data.new(input.size()).fill_(1)
+    output_mask = F.grid_sample(input_mask, grid)
+    padded_output = output * output_mask + canvas * (1 - output_mask)
+    return padded_output
+# phi(x1, x2) = r^2 * log(r), where r = ||x1 - x2||_2
+def compute_partial_repr(input_points, control_points):
+  N = input_points.size(0)
+  M = control_points.size(0)
+  pairwise_diff = input_points.view(N, 1, 2) - control_points.view(1, M, 2)
+  # original implementation, very slow
+  # pairwise_dist = torch.sum(pairwise_diff ** 2, dim = 2) # square of distance
+  pairwise_diff_square = pairwise_diff * pairwise_diff
+  pairwise_dist = pairwise_diff_square[:, :, 0] + pairwise_diff_square[:, :, 1]
+  repr_matrix = 0.5 * pairwise_dist * torch.log(pairwise_dist)
+  # fix numerical error for 0 * log(0), substitute all nan with 0
+  mask = repr_matrix != repr_matrix
+  repr_matrix.masked_fill_(mask, 0)
+  return repr_matrix
+# # output_ctrl_pts are specified, according to our task.
+# def build_output_control_points(num_control_points, margins):
+#   margin_x, margin_y = margins
+#   margin_x, margin_y = 0,0
+#   num_ctrl_pts_per_side = num_control_points // 2
+#   ctrl_pts_x = np.linspace(margin_x, 1.0 - margin_x, num_ctrl_pts_per_side)
+#   ctrl_pts_y_top = np.ones(num_ctrl_pts_per_side) * margin_y
+#   ctrl_pts_y_bottom = np.ones(num_ctrl_pts_per_side) * (1.0 - margin_y)
+#   ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
+#   ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
+#   # ctrl_pts_top = ctrl_pts_top[1:-1,:]
+#   # ctrl_pts_bottom = ctrl_pts_bottom[1:-1,:]
+#   output_ctrl_pts_arr = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
+#   output_ctrl_pts = torch.Tensor(output_ctrl_pts_arr)
+#   return output_ctrl_pts
+# output_ctrl_pts are specified, according to our task.
+# def build_output_control_points(num_control_points, margins):
+#   margin_x, margin_y = margins
+#   # margin_x, margin_y = 0,0
+#   num_ctrl_pts_per_side = (num_control_points-4) // 4 +2
+#   ctrl_pts_x = np.linspace(margin_x, 1.0 - margin_x, num_ctrl_pts_per_side)
+#   ctrl_pts_y_top = np.ones(num_ctrl_pts_per_side) * margin_y
+#   ctrl_pts_y_bottom = np.ones(num_ctrl_pts_per_side) * (1.0 - margin_y)
+#   ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
+#   ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
+#   ctrl_pts_x_left = np.ones(num_ctrl_pts_per_side) * margin_x
+#   ctrl_pts_x_right = np.ones(num_ctrl_pts_per_side) * (1.0-margin_x)
+#   ctrl_pts_left = np.stack([ctrl_pts_x_left[1:-1], ctrl_pts_x[1:-1]], axis=1)
+#   ctrl_pts_right = np.stack([ctrl_pts_x_right[1:-1], ctrl_pts_x[1:-1]], axis=1)
+#   output_ctrl_pts_arr = np.concatenate([ctrl_pts_top, ctrl_pts_bottom, ctrl_pts_left, ctrl_pts_right], axis=0)
+#   output_ctrl_pts = torch.Tensor(output_ctrl_pts_arr)
+#   return output_ctrl_pts
+def build_output_control_points(num_control_points, margins):
+  points = [0.25,0.5,0.75]
+  pts2 = [[0, 0],[1, 0], [0, 1],[1, 1]]
+  # pts22 = []
+  for ratio in points:
+      pts2.append([1*ratio,0])
+  for ratio in points:
+      pts2.append([1*ratio,1])
+  for ratio in points:
+      pts2.append([0,1*ratio])
+  for ratio in points:
+      pts2.append([1,1*ratio])
+  pts2 = np.float32(pts2)
+  margin_x, margin_y = margins
+  # margin_x, margin_y = 0,0
+  num_ctrl_pts_per_side = (num_control_points-4) // 4 +2
+  ctrl_pts_x = np.linspace(margin_x, 1.0 - margin_x, num_ctrl_pts_per_side)
+  ctrl_pts_y_top = np.ones(num_ctrl_pts_per_side) * margin_y
+  ctrl_pts_y_bottom = np.ones(num_ctrl_pts_per_side) * (1.0 - margin_y)
+  ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
+  ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
+  ctrl_pts_x_left = np.ones(num_ctrl_pts_per_side) * margin_x
+  ctrl_pts_x_right = np.ones(num_ctrl_pts_per_side) * (1.0-margin_x)
+  ctrl_pts_left = np.stack([ctrl_pts_x_left[1:-1], ctrl_pts_x[1:-1]], axis=1)
+  ctrl_pts_right = np.stack([ctrl_pts_x_right[1:-1], ctrl_pts_x[1:-1]], axis=1)
+  output_ctrl_pts_arr = np.concatenate([ctrl_pts_top, ctrl_pts_bottom, ctrl_pts_left, ctrl_pts_right], axis=0)
+  # output_ctrl_pts_arr = np.asarray([[0,0],[1,0],[1,1],[0,1],
+  #                                   [],[],[],[],
+  #                                   [],[],[],[],
+  #                                   [],[],[],[]])
+  output_ctrl_pts_arr = pts2
+  # print(output_ctrl_pts_arr.shape,'=================')
+  output_ctrl_pts = torch.FloatTensor(output_ctrl_pts_arr)
+  return output_ctrl_pts
+# demo: ~/test/models/test_tps_transformation.py
+class TPSSpatialTransformer(nn.Module):
+  def __init__(self, output_image_size=None, num_control_points=None, margins=None):
+    super(TPSSpatialTransformer, self).__init__()
+    self.output_image_size = output_image_size
+    self.num_control_points = num_control_points
+    self.margins = margins
+    self.target_height, self.target_width = output_image_size
+    target_control_points = build_output_control_points(num_control_points, margins)
+    N = num_control_points
+    # N = N - 4
+    # create padded kernel matrix
+    forward_kernel = torch.zeros(N + 3, N + 3)
+    target_control_partial_repr = compute_partial_repr(target_control_points, target_control_points)
+    forward_kernel[:N, :N].copy_(target_control_partial_repr)
+    forward_kernel[:N, -3].fill_(1)
+    forward_kernel[-3, :N].fill_(1)
+    forward_kernel[:N, -2:].copy_(target_control_points)
+    forward_kernel[-2:, :N].copy_(target_control_points.transpose(0, 1))
+    # compute inverse matrix
+    # print(forward_kernel.shape)
+    inverse_kernel = torch.inverse(forward_kernel)
+    # create target cordinate matrix
+    HW = self.target_height * self.target_width
+    target_coordinate = list(itertools.product(range(self.target_height), range(self.target_width)))
+    target_coordinate = torch.Tensor(target_coordinate) # HW x 2
+    Y, X = target_coordinate.split(1, dim = 1)
+    Y = Y / (self.target_height - 1)
+    X = X / (self.target_width - 1)
+    target_coordinate = torch.cat([X, Y], dim = 1) # convert from (y, x) to (x, y)
+    target_coordinate_partial_repr = compute_partial_repr(target_coordinate, target_control_points)
+    target_coordinate_repr = torch.cat([
+      target_coordinate_partial_repr, torch.ones(HW, 1), target_coordinate
+    ], dim = 1)
+    # register precomputed matrices
+    self.register_buffer('inverse_kernel', inverse_kernel)
+    self.register_buffer('padding_matrix', torch.zeros(3, 2))
+    self.register_buffer('target_coordinate_repr', target_coordinate_repr)
+    self.register_buffer('target_control_points', target_control_points)
+  def forward(self, input, source_control_points,direction='dewarp'):
+    if direction == 'dewarp':
+      assert source_control_points.ndimension() == 3
+      assert source_control_points.size(1) == self.num_control_points
+      assert source_control_points.size(2) == 2
+      batch_size = source_control_points.size(0)
+      Y = torch.cat([source_control_points, self.padding_matrix.expand(batch_size, 3, 2)], 1)
+      mapping_matrix = torch.matmul(self.inverse_kernel, Y)
+      source_coordinate = torch.matmul(self.target_coordinate_repr, mapping_matrix)
+      grid = source_coordinate.view(-1, self.target_height, self.target_width, 2)
+      grid = torch.clamp(grid, 0, 1) # the source_control_points may be out of [0, 1].
+      # the input to grid_sample is normalized [-1, 1], but what we get is [0, 1]
+      grid = 2.0 * grid - 1.0
+      output = grid_sample(input, grid, canvas=None)
+      return output, grid
+    # elif direction == 'warp':
+    #   target_control_points = source_control_points.clone()
+    #   source_control_points = (build_output_control_points(self.num_control_points, self.margins)).clone()
+    #   source_control_points = source_control_points.unsqueeze(0)
+    #   source_control_points = source_control_points.expand(target_control_points.size(0),self.num_control_points,2)
+    #   assert source_control_points.ndimension() == 3
+    #   assert source_control_points.size(1) == self.num_control_points
+    #   assert source_control_points.size(2) == 2
+    #   batch_size = source_control_points.size(0)
+    #   Y = torch.cat([source_control_points.to('cuda'), self.padding_matrix.expand(batch_size, 3, 2)], 1)
+    #   mapping_matrix = torch.matmul(self.inverse_kernel, Y)
+    #   source_coordinate = torch.matmul(self.target_coordinate_repr, mapping_matrix)
+    #   grid = source_coordinate.view(-1, self.target_height, self.target_width, 2)
+    #   grid = torch.clamp(grid, 0, 1) # the source_control_points may be out of [0, 1].
+    #   # the input to grid_sample is normalized [-1, 1], but what we get is [0, 1]
+    #   grid = 2.0 * grid - 1.0
+    #   output_maps = grid_sample(input, grid, canvas=None)
+    #   return output_maps, source_coordinate

data/MBD/stn_model/stn_head.py ADDED Viewed

	@@ -0,0 +1,123 @@

+from __future__ import absolute_import
+import math
+import numpy as np
+import sys
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.nn import init
+def conv3x3_block(in_planes, out_planes, stride=1):
+  """3x3 convolution with padding"""
+  conv_layer = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=1, padding=1)
+  block = nn.Sequential(
+    conv_layer,
+    nn.BatchNorm2d(out_planes),
+    nn.ReLU(inplace=True),
+  )
+  return block
+class STNHead(nn.Module):
+  def __init__(self, in_planes, num_ctrlpoints, activation='none'):
+    super(STNHead, self).__init__()
+    self.in_planes = in_planes
+    self.num_ctrlpoints = num_ctrlpoints
+    self.activation = activation
+    self.stn_convnet = nn.Sequential(
+                          conv3x3_block(in_planes, 32), # 32*64
+                          nn.MaxPool2d(kernel_size=2, stride=2),
+                          conv3x3_block(32, 64), # 16*32
+                          nn.MaxPool2d(kernel_size=2, stride=2),
+                          conv3x3_block(64, 128), # 8*16
+                          nn.MaxPool2d(kernel_size=2, stride=2),
+                          conv3x3_block(128, 256), # 4*8
+                          nn.MaxPool2d(kernel_size=2, stride=2),
+                          conv3x3_block(256, 256), # 2*4,
+                          nn.MaxPool2d(kernel_size=2, stride=2),
+                          conv3x3_block(256, 256)) # 1*2 > 256*8*8
+    self.stn_fc1 = nn.Sequential(
+                      # nn.Linear(2*256, 512),
+                      nn.Linear(8*8*256, 512),
+                      nn.BatchNorm1d(512),
+                      nn.ReLU(inplace=True))
+    self.stn_fc2 = nn.Linear(512, num_ctrlpoints*2)
+    self.init_weights(self.stn_convnet)
+    self.init_weights(self.stn_fc1)
+    self.init_stn(self.stn_fc2)
+  def init_weights(self, module):
+    for m in module.modules():
+      if isinstance(m, nn.Conv2d):
+        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+        m.weight.data.normal_(0, math.sqrt(2. / n))
+        if m.bias is not None:
+          m.bias.data.zero_()
+      elif isinstance(m, nn.BatchNorm2d):
+        m.weight.data.fill_(1)
+        m.bias.data.zero_()
+      elif isinstance(m, nn.Linear):
+        m.weight.data.normal_(0, 0.001)
+        m.bias.data.zero_()
+  def init_stn(self, stn_fc2):
+    # margin = 0.01
+    # sampling_num_per_side = int(self.num_ctrlpoints / 2)
+    # ctrl_pts_x = np.linspace(margin, 1.-margin, sampling_num_per_side)
+    # ctrl_pts_y_top = np.ones(sampling_num_per_side) * margin
+    # ctrl_pts_y_bottom = np.ones(sampling_num_per_side) * (1-margin)
+    # ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
+    # ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
+    # ctrl_points = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0).astype(np.float32)
+    margin_x, margin_y = 0.35,0.35
+    # margin_x, margin_y = 0,0
+    num_ctrl_pts_per_side = (self.num_ctrlpoints-4) // 4 +2
+    ctrl_pts_x = np.linspace(margin_x, 1.0 - margin_x, num_ctrl_pts_per_side)
+    ctrl_pts_y_top = np.ones(num_ctrl_pts_per_side) * margin_y
+    ctrl_pts_y_bottom = np.ones(num_ctrl_pts_per_side) * (1.0 - margin_y)
+    ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
+    ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
+    ctrl_pts_x_left = np.ones(num_ctrl_pts_per_side) * margin_x
+    ctrl_pts_x_right = np.ones(num_ctrl_pts_per_side) * (1.0-margin_x)
+    ctrl_pts_left = np.stack([ctrl_pts_x_left[1:-1], ctrl_pts_x[1:-1]], axis=1)
+    ctrl_pts_right = np.stack([ctrl_pts_x_right[1:-1], ctrl_pts_x[1:-1]], axis=1)
+    ctrl_points = np.concatenate([ctrl_pts_top, ctrl_pts_bottom, ctrl_pts_left, ctrl_pts_right],  axis=0).astype(np.float32)
+    if self.activation is 'none':
+      pass
+    elif self.activation == 'sigmoid':
+      ctrl_points = -np.log(1. / ctrl_points - 1.)
+    stn_fc2.weight.data.zero_()
+    stn_fc2.bias.data = torch.Tensor(ctrl_points).view(-1)
+  def forward(self, x):
+    x = self.stn_convnet(x)
+    batch_size, _, h, w = x.size()
+    x = x.view(batch_size, -1)
+    img_feat = self.stn_fc1(x)
+    x = self.stn_fc2(0.1 * img_feat)
+    if self.activation == 'sigmoid':
+      x = F.sigmoid(x)
+    x = x.view(-1, self.num_ctrlpoints, 2)
+    return img_feat, x
+if __name__ == "__main__":
+  in_planes = 3
+  num_ctrlpoints = 20
+  activation='none' # 'sigmoid'
+  stn_head = STNHead(in_planes, num_ctrlpoints, activation)
+  input = torch.randn(10, 3, 32, 64)
+  control_points = stn_head(input)
+  print(control_points.size())

data/MBD/stn_model/tps_spatial_transformer.py ADDED Viewed

	@@ -0,0 +1,155 @@

+from __future__ import absolute_import
+import numpy as np
+import itertools
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def grid_sample(input, grid, canvas = None):
+  output = F.grid_sample(input, grid)
+  if canvas is None:
+    return output
+  else:
+    input_mask = input.data.new(input.size()).fill_(1)
+    output_mask = F.grid_sample(input_mask, grid)
+    padded_output = output * output_mask + canvas * (1 - output_mask)
+    return padded_output
+# phi(x1, x2) = r^2 * log(r), where r = ||x1 - x2||_2
+def compute_partial_repr(input_points, control_points):
+  N = input_points.size(0)
+  M = control_points.size(0)
+  pairwise_diff = input_points.view(N, 1, 2) - control_points.view(1, M, 2)
+  # original implementation, very slow
+  # pairwise_dist = torch.sum(pairwise_diff ** 2, dim = 2) # square of distance
+  pairwise_diff_square = pairwise_diff * pairwise_diff
+  pairwise_dist = pairwise_diff_square[:, :, 0] + pairwise_diff_square[:, :, 1]
+  repr_matrix = 0.5 * pairwise_dist * torch.log(pairwise_dist)
+  # fix numerical error for 0 * log(0), substitute all nan with 0
+  mask = repr_matrix != repr_matrix
+  repr_matrix.masked_fill_(mask, 0)
+  return repr_matrix
+# # output_ctrl_pts are specified, according to our task.
+# def build_output_control_points(num_control_points, margins):
+#   margin_x, margin_y = margins
+#   margin_x, margin_y = 0,0
+#   num_ctrl_pts_per_side = num_control_points // 2
+#   ctrl_pts_x = np.linspace(margin_x, 1.0 - margin_x, num_ctrl_pts_per_side)
+#   ctrl_pts_y_top = np.ones(num_ctrl_pts_per_side) * margin_y
+#   ctrl_pts_y_bottom = np.ones(num_ctrl_pts_per_side) * (1.0 - margin_y)
+#   ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
+#   ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
+#   # ctrl_pts_top = ctrl_pts_top[1:-1,:]
+#   # ctrl_pts_bottom = ctrl_pts_bottom[1:-1,:]
+#   output_ctrl_pts_arr = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
+#   output_ctrl_pts = torch.Tensor(output_ctrl_pts_arr)
+#   return output_ctrl_pts
+# output_ctrl_pts are specified, according to our task.
+def build_output_control_points(num_control_points, margins):
+  margin_x, margin_y = margins
+  # margin_x, margin_y = 0,0
+  num_ctrl_pts_per_side = (num_control_points-4) // 4 +2
+  ctrl_pts_x = np.linspace(margin_x, 1.0 - margin_x, num_ctrl_pts_per_side)
+  ctrl_pts_y_top = np.ones(num_ctrl_pts_per_side) * margin_y
+  ctrl_pts_y_bottom = np.ones(num_ctrl_pts_per_side) * (1.0 - margin_y)
+  ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
+  ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
+  ctrl_pts_x_left = np.ones(num_ctrl_pts_per_side) * margin_x
+  ctrl_pts_x_right = np.ones(num_ctrl_pts_per_side) * (1.0-margin_x)
+  ctrl_pts_left = np.stack([ctrl_pts_x_left[1:-1], ctrl_pts_x[1:-1]], axis=1)
+  ctrl_pts_right = np.stack([ctrl_pts_x_right[1:-1], ctrl_pts_x[1:-1]], axis=1)
+  output_ctrl_pts_arr = np.concatenate([ctrl_pts_top, ctrl_pts_bottom, ctrl_pts_left, ctrl_pts_right], axis=0)
+  output_ctrl_pts = torch.Tensor(output_ctrl_pts_arr)
+  return output_ctrl_pts
+# demo: ~/test/models/test_tps_transformation.py
+class TPSSpatialTransformer(nn.Module):
+  def __init__(self, output_image_size=None, num_control_points=None, margins=None):
+    super(TPSSpatialTransformer, self).__init__()
+    self.output_image_size = output_image_size
+    self.num_control_points = num_control_points
+    self.margins = margins
+    self.target_height, self.target_width = output_image_size
+    target_control_points = build_output_control_points(num_control_points, margins)
+    N = num_control_points
+    # N = N - 4
+    # create padded kernel matrix
+    forward_kernel = torch.zeros(N + 3, N + 3)
+    target_control_partial_repr = compute_partial_repr(target_control_points, target_control_points)
+    forward_kernel[:N, :N].copy_(target_control_partial_repr)
+    forward_kernel[:N, -3].fill_(1)
+    forward_kernel[-3, :N].fill_(1)
+    forward_kernel[:N, -2:].copy_(target_control_points)
+    forward_kernel[-2:, :N].copy_(target_control_points.transpose(0, 1))
+    # compute inverse matrix
+    # print(forward_kernel.shape)
+    inverse_kernel = torch.inverse(forward_kernel)
+    # create target cordinate matrix
+    HW = self.target_height * self.target_width
+    target_coordinate = list(itertools.product(range(self.target_height), range(self.target_width)))
+    target_coordinate = torch.Tensor(target_coordinate) # HW x 2
+    Y, X = target_coordinate.split(1, dim = 1)
+    Y = Y / (self.target_height - 1)
+    X = X / (self.target_width - 1)
+    target_coordinate = torch.cat([X, Y], dim = 1) # convert from (y, x) to (x, y)
+    target_coordinate_partial_repr = compute_partial_repr(target_coordinate, target_control_points)
+    target_coordinate_repr = torch.cat([
+      target_coordinate_partial_repr, torch.ones(HW, 1), target_coordinate
+    ], dim = 1)
+    # register precomputed matrices
+    self.register_buffer('inverse_kernel', inverse_kernel)
+    self.register_buffer('padding_matrix', torch.zeros(3, 2))
+    self.register_buffer('target_coordinate_repr', target_coordinate_repr)
+    self.register_buffer('target_control_points', target_control_points)
+  def forward(self, input, source_control_points,direction='dewarp'):
+    if direction == 'dewarp':
+      assert source_control_points.ndimension() == 3
+      assert source_control_points.size(1) == self.num_control_points
+      assert source_control_points.size(2) == 2
+      batch_size = source_control_points.size(0)
+      Y = torch.cat([source_control_points, self.padding_matrix.expand(batch_size, 3, 2)], 1)
+      mapping_matrix = torch.matmul(self.inverse_kernel, Y)
+      source_coordinate = torch.matmul(self.target_coordinate_repr, mapping_matrix)
+      grid = source_coordinate.view(-1, self.target_height, self.target_width, 2)
+      grid = torch.clamp(grid, 0, 1) # the source_control_points may be out of [0, 1].
+      # the input to grid_sample is normalized [-1, 1], but what we get is [0, 1]
+      grid = 2.0 * grid - 1.0
+      output_maps = grid_sample(input, grid, canvas=None)
+      return output_maps, source_coordinate
+    # elif direction == 'warp':
+    #   target_control_points = source_control_points.clone()
+    #   source_control_points = (build_output_control_points(self.num_control_points, self.margins)).clone()
+    #   source_control_points = source_control_points.unsqueeze(0)
+    #   source_control_points = source_control_points.expand(target_control_points.size(0),self.num_control_points,2)
+    #   assert source_control_points.ndimension() == 3
+    #   assert source_control_points.size(1) == self.num_control_points
+    #   assert source_control_points.size(2) == 2
+    #   batch_size = source_control_points.size(0)
+    #   Y = torch.cat([source_control_points.to('cuda'), self.padding_matrix.expand(batch_size, 3, 2)], 1)
+    #   mapping_matrix = torch.matmul(self.inverse_kernel, Y)
+    #   source_coordinate = torch.matmul(self.target_coordinate_repr, mapping_matrix)
+    #   grid = source_coordinate.view(-1, self.target_height, self.target_width, 2)
+    #   grid = torch.clamp(grid, 0, 1) # the source_control_points may be out of [0, 1].
+    #   # the input to grid_sample is normalized [-1, 1], but what we get is [0, 1]
+    #   grid = 2.0 * grid - 1.0
+    #   output_maps = grid_sample(input, grid, canvas=None)
+    #   return output_maps, source_coordinate

data/MBD/tps_grid_gen.py ADDED Viewed

	@@ -0,0 +1,70 @@

+# encoding: utf-8
+import torch
+import itertools
+import torch.nn as nn
+from torch.autograd import Function, Variable
+class TPSGridGen(nn.Module):
+    def __init__(self, target_height, target_width, target_control_points):
+        super(TPSGridGen, self).__init__()
+        assert target_control_points.ndimension() == 2
+        assert target_control_points.size(1) == 2
+        N = target_control_points.size(0)
+        self.num_points = N
+        target_control_points = target_control_points.float()
+        # create padded kernel matrix
+        forward_kernel = torch.zeros(N + 3, N + 3)
+        target_control_partial_repr = self.compute_partial_repr(target_control_points, target_control_points)
+        forward_kernel[:N, :N].copy_(target_control_partial_repr)
+        forward_kernel[:N, -3].fill_(1)
+        forward_kernel[-3, :N].fill_(1)
+        forward_kernel[:N, -2:].copy_(target_control_points)
+        forward_kernel[-2:, :N].copy_(target_control_points.transpose(0, 1))
+        # compute inverse matrix
+        inverse_kernel = torch.inverse(forward_kernel)
+        # create target cordinate matrix
+        HW = target_height * target_width
+        target_coordinate = list(itertools.product(range(target_height), range(target_width)))
+        target_coordinate = torch.Tensor(target_coordinate) # HW x 2
+        Y, X = target_coordinate.split(1, dim = 1)
+        Y = Y * 2 / (target_height - 1) - 1
+        X = X * 2 / (target_width - 1) - 1
+        target_coordinate = torch.cat([X, Y], dim = 1) # convert from (y, x) to (x, y)
+        target_coordinate_partial_repr = self.compute_partial_repr(target_coordinate, target_control_points)
+        target_coordinate_repr = torch.cat([
+            target_coordinate_partial_repr, torch.ones(HW, 1), target_coordinate
+        ], dim = 1)
+        # register precomputed matrices
+        self.register_buffer('inverse_kernel', inverse_kernel)
+        self.register_buffer('padding_matrix', torch.zeros(3, 2))
+        self.register_buffer('target_coordinate_repr', target_coordinate_repr)
+    def forward(self, source_control_points):
+        assert source_control_points.ndimension() == 3
+        assert source_control_points.size(1) == self.num_points
+        assert source_control_points.size(2) == 2
+        batch_size = source_control_points.size(0)
+        Y = torch.cat([source_control_points, Variable(self.padding_matrix.expand(batch_size, 3, 2))], 1)
+        mapping_matrix = torch.matmul(Variable(self.inverse_kernel), Y)
+        source_coordinate = torch.matmul(Variable(self.target_coordinate_repr), mapping_matrix)
+        return source_coordinate
+    # phi(x1, x2) = r^2 * log(r), where r = ||x1 - x2||_2
+    def compute_partial_repr(self, input_points, control_points):
+        N = input_points.size(0)
+        M = control_points.size(0)
+        pairwise_diff = input_points.view(N, 1, 2) - control_points.view(1, M, 2)
+        # original implementation, very slow
+        # pairwise_dist = torch.sum(pairwise_diff ** 2, dim = 2) # square of distance
+        pairwise_diff_square = pairwise_diff * pairwise_diff
+        pairwise_dist = pairwise_diff_square[:, :, 0] + pairwise_diff_square[:, :, 1]
+        repr_matrix = 0.5 * pairwise_dist * torch.log(pairwise_dist)
+        # fix numerical error for 0 * log(0), substitute all nan with 0
+        mask = repr_matrix != repr_matrix
+        repr_matrix.masked_fill_(mask, 0)
+        return repr_matrix

data/MBD/utils.py ADDED Viewed

	@@ -0,0 +1,234 @@

+'''
+Misc Utility functions
+'''
+from collections import OrderedDict
+import os
+import numpy as np
+import torch
+import random
+import torchvision
+def recursive_glob(rootdir='.', suffix=''):
+    """Performs recursive glob with given suffix and rootdir
+        :param rootdir is the root directory
+        :param suffix is the suffix to be searched
+    """
+    return [os.path.join(looproot, filename)
+        for looproot, _, filenames in os.walk(rootdir)
+        for filename in filenames if filename.endswith(suffix)]
+def poly_lr_scheduler(optimizer, init_lr, iter, lr_decay_iter=1, max_iter=30000, power=0.9,):
+    """Polynomial decay of learning rate
+        :param init_lr is base learning rate
+        :param iter is a current iteration
+        :param lr_decay_iter how frequently decay occurs, default is 1
+        :param max_iter is number of maximum iterations
+        :param power is a polymomial power
+    """
+    if iter % lr_decay_iter or iter > max_iter:
+        return optimizer
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = init_lr*(1 - iter/max_iter)**power
+def adjust_learning_rate(optimizer, init_lr, epoch):
+    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+    lr = init_lr * (0.1 ** (epoch // 30))
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+def alpha_blend(input_image, segmentation_mask, alpha=0.5):
+    """Alpha Blending utility to overlay RGB masks on RBG images
+        :param input_image is a np.ndarray with 3 channels
+        :param segmentation_mask is a np.ndarray with 3 channels
+        :param alpha is a float value
+    """
+    blended = np.zeros(input_image.size, dtype=np.float32)
+    blended = input_image * alpha + segmentation_mask * (1 - alpha)
+    return blended
+def convert_state_dict(state_dict):
+    """Converts a state dict saved from a dataParallel module to normal
+       module state_dict inplace
+       :param state_dict is the loaded DataParallel model_state
+    """
+    new_state_dict = OrderedDict()
+    for k, v in state_dict.items():
+        name = k[7:] # remove `module.`
+        new_state_dict[name] = v
+    return new_state_dict
+class ImagePool():
+    def __init__(self, pool_size):
+        self.pool_size = pool_size
+        if self.pool_size > 0:
+            self.num_imgs = 0
+            self.images = []
+    def query(self, images):
+        if self.pool_size == 0:
+            return images
+        return_images = []
+        for image in images:
+            image = torch.unsqueeze(image.data, 0)
+            if self.num_imgs < self.pool_size:
+                self.num_imgs = self.num_imgs + 1
+                self.images.append(image)
+                return_images.append(image)
+            else:
+                p = random.uniform(0, 1)
+                if p > 0.5:
+                    random_id = random.randint(0, self.pool_size - 1)  # randint is inclusive
+                    tmp = self.images[random_id].clone()
+                    self.images[random_id] = image
+                    return_images.append(tmp)
+                else:
+                    return_images.append(image)
+        return_images = torch.cat(return_images, 0)
+        return return_images
+def set_requires_grad(nets, requires_grad=False):
+    if not isinstance(nets, list):
+        nets = [nets]
+    for net in nets:
+        if net is not None:
+            for param in net.parameters():
+                param.requires_grad = requires_grad
+def get_lr(optimizer):
+    for param_group in optimizer.param_groups:
+        return float(param_group['lr'])
+def visualize(epoch,model,layer):
+    #get conv layers
+    conv_layers=[]
+    for m in model.modules():
+        if isinstance(m,torch.nn.modules.conv.Conv2d):
+            conv_layers.append(m)
+    # print conv_layers[layer].weight.data.cpu().numpy().shape
+    tensor=conv_layers[layer].weight.data.cpu()
+    vistensor(tensor, epoch, ch=0, allkernels=False, nrow=8, padding=1)
+def vistensor(tensor, epoch, ch=0, allkernels=False, nrow=8, padding=1):
+    '''
+        vistensor: visuzlization tensor
+        @ch: visualization channel
+        @allkernels: visualization all tensors
+        https://github.com/pedrodiamel/pytorchvision/blob/a14672fe4b07995e99f8af755de875daf8aababb/pytvision/visualization.py#L325
+    '''
+    n,c,w,h = tensor.shape
+    if allkernels: tensor = tensor.view(n*c,-1,w,h )
+    elif c != 3: tensor = tensor[:,ch,:,:].unsqueeze(dim=1)
+    rows = np.min( (tensor.shape[0]//nrow + 1, 64 )  )
+    # print rows
+    # print tensor.shape
+    grid = utils.make_grid(tensor, nrow=8, normalize=True, padding=padding)
+    # print grid.shape
+    plt.figure( figsize=(10,10), dpi=200 )
+    plt.imshow(grid.numpy().transpose((1, 2, 0)))
+    plt.savefig('./generated/filters_layer1_dwuv_'+str(epoch)+'.png')
+    plt.close()
+def show_uloss(uwpred,uworg,inp_img, samples=7):
+    n,c,h,w=inp_img.shape
+    # print(labels.shape)
+    uwpred=uwpred.detach().cpu().numpy()
+    uworg=uworg.detach().cpu().numpy()
+    inp_img=inp_img.detach().cpu().numpy()
+    #NCHW->NHWC
+    uwpred=uwpred.transpose((0, 2, 3, 1))
+    uworg=uworg.transpose((0, 2, 3, 1))
+    choices=random.sample(range(n), min(n,samples))
+    f, axarr = plt.subplots(samples, 3)
+    for j in range(samples):
+        # print(np.min(labels[j]))
+        # print imgs[j].shape
+        img=inp_img[j].transpose(1,2,0)
+        axarr[j][0].imshow(img[:,:,::-1])
+        axarr[j][1].imshow(uworg[j])
+        axarr[j][2].imshow(uwpred[j])
+    plt.savefig('./generated/unwarp.png')
+    plt.close()
+def show_uloss_visdom(vis,uwpred,uworg,labels_win,out_win,labelopts,outopts,args):
+    samples=7
+    n,c,h,w=uwpred.shape
+    uwpred=uwpred.detach().cpu().numpy()
+    uworg=uworg.detach().cpu().numpy()
+    out_arr=np.full((samples,3,args.img_rows,args.img_cols),0.0)
+    label_arr=np.full((samples,3,args.img_rows,args.img_cols),0.0)
+    choices=random.sample(range(n), min(n,samples))
+    idx=0
+    for c in choices:
+        out_arr[idx,:,:,:]=uwpred[c]
+        label_arr[idx,:,:,:]=uworg[c]
+        idx+=1
+    vis.images(out_arr,
+               win=out_win,
+               opts=outopts)
+    vis.images(label_arr,
+               win=labels_win,
+               opts=labelopts)
+def show_unwarp_tnsboard(global_step,writer,uwpred,uworg,grid_samples,gt_tag,pred_tag):
+    idxs=torch.LongTensor(random.sample(range(images.shape[0]), min(grid_samples,images.shape[0])))
+    grid_uworg = torchvision.utils.make_grid(uworg[idxs],normalize=True, scale_each=True)
+    writer.add_image(gt_tag, grid_uworg, global_step)
+    grid_uwpr = torchvision.utils.make_grid(uwpred[idxs],normalize=True, scale_each=True)
+    writer.add_image(pred_tag, grid_uwpr, global_step)
+def show_wc_tnsboard(global_step,writer,images,labels, pred, grid_samples,inp_tag, gt_tag, pred_tag):
+    idxs=torch.LongTensor(random.sample(range(images.shape[0]), min(grid_samples,images.shape[0])))
+    grid_inp = torchvision.utils.make_grid(images[idxs],normalize=True, scale_each=True)
+    writer.add_image(inp_tag, grid_inp, global_step)
+    grid_lbl = torchvision.utils.make_grid(labels[idxs],normalize=True, scale_each=True)
+    writer.add_image(gt_tag, grid_lbl, global_step)
+    grid_pred = torchvision.utils.make_grid(pred[idxs],normalize=True, scale_each=True)
+    writer.add_image(pred_tag, grid_pred, global_step)
+def torch2cvimg(tensor,min=0,max=1):
+    '''
+    input:
+        tensor -> torch.tensor BxCxHxW C can be 1,3
+    return
+        im -> ndarray uint8 HxWxC
+    '''
+    im_list = []
+    for i in range(tensor.shape[0]):
+        im = tensor.detach().cpu().data.numpy()[i]
+        im = im.transpose(1,2,0)
+        im = np.clip(im,min,max)
+        im = ((im-min)/(max-min)*255).astype(np.uint8)
+        im_list.append(im)
+    return im_list
+def cvimg2torch(img,min=0,max=1):
+    '''
+    input:
+        im -> ndarray uint8 HxWxC
+    return
+        tensor -> torch.tensor BxCxHxW
+    '''
+    img = img.astype(float) / 255.0
+    img = img.transpose(2, 0, 1) # NHWC -> NCHW
+    img = np.expand_dims(img, 0)
+    img = torch.from_numpy(img).float()
+    return img

data/README.md ADDED Viewed

	@@ -0,0 +1,135 @@

+# Dataset Preparation
+The data files tree should be look like:
+```
+data/
+    eval/
+        dir300/
+            1_in.png
+            1_gt.png
+            ...
+        kligler/
+        jung/
+        osr/
+        realdae/
+        docunet_docaligner/
+        dibco18/
+    train/
+        dewarping/
+            doc3d/
+        deshadowing/
+            fsdsrd/
+            tdd/
+        appearance/
+            clean_pdfs/
+            realdae/
+        deblurring/
+            tdd/
+        binarization/
+            bickly/
+            dibco/
+            noise_office/
+            phibd/
+            msi/
+```
+## Evaluation Dataset
+You can find the links for downloading the dataset we used for evaluation (Tables 1 and 2) in [this](https://github.com/ZZZHANG-jx/Recommendations-Document-Image-Processing/tree/master) repository, including DIR300 (300 samples), Kligler (300 samples), Jung (87 samples), OSR (237 samples), RealDAE (150 samples), DocUNet_DocAligner (150 samples), TDD (16000 samples) and DIBCO18 (10 samples). After downloading, add the suffix of `_in` and `_gt` to the input image and gt image respectively, and place them in the folder of the corresponding dataset
+## Training Dataset
+You can find the links for downloading the dataset we used for training in [this](https://github.com/ZZZHANG-jx/Recommendations-Document-Image-Processing/tree/master) repository.
+### Dewarping
+- Doc3D
+    - Mask extraction: you should extract the mask for each image from the uv data in Doc3D
+    - Background preparation: you can download the background data from [here](https://www.robots.ox.ac.uk/~vgg/data/dtd/) and specify it for self.background_paths in `loaders/docres_loader.py`
+- JSON preparation:
+```
+[
+    ## you need to specify the paths of 'in_path', 'mask_path and 'gt_path':
+    {
+        "in_path": "dewarping/doc3d/img/1/102_1-pp_Page_048-xov0001.png",
+        "mask_path": "dewarping/doc3d/mask/1/102_1-pp_Page_048-xov0001.png",
+        "gt_path": "dewarping/doc3d/bm/1/102_1-pp_Page_048-xov0001.npy"
+    }
+]
+```
+### Deshadowing
+- RDD
+- FSDSRD
+- JSON preparation
+```
+[   ## you need to specify the paths of 'in_path' and 'gt_path', for example:
+    {
+        "in_path": "deshadowing/fsdsrd/im/00004.png",
+        "gt_path": "deshadowing/fsdsrd/gt/00004.png"
+    },
+    {
+        "in_path": "deshadowing/rdd/im/00004.png",
+        "gt_path": "deshadowing/rdd/gt/00004.png"
+    }
+]
+```
+### Appearance enhancement
+- Doc3DShade
+    - Clean PDFs collection: You should collection PDFs files from the internet and convert them as images to serve as the source for synthesis.
+    - Extract shadows from Doc3DShade by using `data/preprocess/shadow_extract.py` and dewarp the obtained shadows by using `data/MBD/infer.py`. Then you should specify self.shadow_paths in `loaders/docres_loader.py`
+- RealDAE
+- JSON preparation:
+```
+[
+    ## for Doc3DShade dataset, you only need to specify the path of image from PDF, for example:
+    {
+        'gt_path':'appearance/clean_pdfs/1.jpg'
+    },
+    ## for RealDAE dataset, you need to specify the paths of both input and gt, for example:
+    {
+        'in_path': 'appearance/realdae/1_in.jpg',
+        'gt_path': 'appearance/realdae/1_gt.jpg'
+    }
+]
+```
+### Debluring
+- TDD
+- JSON preparation
+```
+[   ## you need to specify the paths of 'in_path' and 'gt_path', for example:
+    {
+        "in_path": "debluring/tdd/im/00004.png",
+        "gt_path": "debluring/tdd/gt/00004.png"
+    },
+]
+```
+### Binarization
+- Bickly
+    - DTPrompt preparation: Since the DTPrompt for binarization is time-expensive, we obtain it offline before training. Use `data/preprocess/sauvola_binarize.py`
+- DIBCO
+    - DTPrompt preparation: the same as Bickly
+- Noise Office
+    - DTPrompt preparation: the same as Bickly
+- PHIDB
+    - DTPrompt preparation: the same as Bickly
+- MSI
+    - DTPrompt preparation: the same as Bickly
+- JSON preparation
+```
+[
+    ## you need to specify the paths of 'in_path', 'gt_path', 'bin_path', 'thr_path' and 'gradient_path', for example:
+    {
+        "in_path": "binarization/noise_office/imgs/1.png",
+        "gt_path": "binarization/noise_office/gt_imgs/1.png",
+        "bin_path": "binarization/noise_office/imgs/1_bin.png",
+        "thr_path": "binarization/noise_office/imgs/1_thr.png",
+        "gradient_path": "binarization/noise_office/imgs/1_gradient.png"
+    },
+]
+```
+After all the data are prepared, you should specify the dataset_setting in `train.py`.

data/preprocess/crop_merge_image.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import os
+import cv2
+import numpy as np
+# SIZE =256
+# BATCH_SIZE = 32
+# STRIDES = 256
+def split_img(img, size_x, size_y, strides):
+    max_y, max_x = img.shape[:2]
+    border_y = 0
+    if max_y % size_y != 0:
+        border_y = size_y - (max_y % size_y)
+        img = cv2.copyMakeBorder(img,border_y,0,0,0,cv2.BORDER_REPLICATE)
+        # img = cv2.copyMakeBorder(img, border_y, 0, 0, 0, cv2.BORDER_CONSTANT, value=[255,255,255])
+    border_x = 0
+    if max_x % size_x != 0:
+        border_x = size_x - (max_x % size_x)
+        # img = cv2.copyMakeBorder(img, 0, 0, border_x, 0, cv2.BORDER_CONSTANT, value=[255,255,255])
+        img = cv2.copyMakeBorder(img,0,0,border_x,0,cv2.BORDER_REPLICATE)
+    # h,w
+    max_y, max_x = img.shape[:2]
+    parts = []
+    curr_y = 0
+    x = 0
+    y = 0
+    # TODO: rewrite with generators.
+    while (curr_y + size_y) <= max_y:
+        curr_x = 0
+        while (curr_x + size_x) <= max_x:
+            parts.append(img[curr_y:curr_y + size_y, curr_x:curr_x + size_x])
+            curr_x += strides
+        y += 1
+        curr_y += strides
+        # parts is a list
+        # (windows_number_x*windows_number_y,SIZE,SIZE,3)
+    # print(max_y,max_x)
+    # print(y,x)
+    # print(np.array(parts).shape)
+    return parts, border_x, border_y, max_x, max_y
+def combine_imgs(border_x,border_y,imgs, max_y, max_x,size_x, size_y, strides):
+    # weighted_img
+    index = int(size_x / strides)
+    weight_img = np.ones(shape=(max_y,max_x))
+    weight_img[0:strides] = index
+    weight_img[-strides:] = index
+    weight_img[:,0:strides]=index
+    weight_img[:,-strides:]=index
+    # 边上
+    i = 0
+    for j in range(1,index+1):
+        # 左上
+        weight_img[0:strides,i:i+strides] = np.ones(shape=(strides,strides))*j
+        weight_img[i:i+strides,0:strides] = np.ones(shape=(strides,strides))*j
+        # 右上
+        weight_img[i:i+strides,-strides:] = np.ones(shape=(strides,strides))*j
+        if i == 0:
+            weight_img[0:strides,-strides:] = np.ones(shape=(strides,strides))*j
+        else:
+            weight_img[0:strides,-strides-i:-i] = np.ones(shape=(strides,strides))*j
+        # 左下
+        weight_img[-strides:,i:i+strides] = np.ones(shape=(strides,strides))*j
+        if i == 0:
+            weight_img[-strides:,0:strides] = np.ones(shape=(strides,strides))*j
+        else:
+            weight_img[-strides-i:-i:,0:strides] = np.ones(shape=(strides,strides))*j
+        # 右下
+        if i == 0:
+            weight_img[-strides:,-strides:] = np.ones(shape=(strides,strides))*j
+        else:
+            weight_img[-strides-i:-i,-strides:] = np.ones(shape=(strides,strides))*j
+            weight_img[-strides:,-strides-i:-i] = np.ones(shape=(strides,strides))*j
+        i += strides
+    for i in range(strides,max_y-strides,strides):
+    	for j in range(strides,max_x-strides,strides):
+    		weight_img[i:i+strides,j:j+strides] = np.ones(shape=(strides,strides))*weight_img[i][0]*weight_img[0][j]
+    if len(imgs[0].shape)==2:
+        new_img = np.zeros(shape=(max_y,max_x))
+        weight_img = (1 / weight_img)
+    else:
+        new_img = np.zeros(shape=(max_y,max_x,imgs[0].shape[-1]))
+        weight_img = (1 / weight_img).reshape((max_y,max_x,1))
+        weight_img = np.tile(weight_img,(1,1,imgs[0].shape[-1]))
+    curr_y = 0
+    x = 0
+    y = 0
+    i = 0
+        # TODO: rewrite with generators.
+    while (curr_y + size_y) <= max_y:
+        curr_x = 0
+        while (curr_x + size_x) <= max_x:
+            new_img[curr_y:curr_y + size_y, curr_x:curr_x + size_x] += weight_img[curr_y:curr_y + size_y, curr_x:curr_x + size_x]*imgs[i]
+            i += 1
+            curr_x += strides
+        y += 1
+        curr_y += strides
+    new_img = new_img[border_y:, border_x:]
+    # print(border_y,border_x)
+    return new_img
+def stride_integral(img,stride=32):
+    h,w = img.shape[:2]
+    if (h%stride)!=0:
+        padding_h = stride - (h%stride)
+        img = cv2.copyMakeBorder(img,padding_h,0,0,0,borderType=cv2.BORDER_REPLICATE)
+    else:
+        padding_h = 0
+    if (w%stride)!=0:
+        padding_w = stride - (w%stride)
+        img = cv2.copyMakeBorder(img,0,0,padding_w,0,borderType=cv2.BORDER_REPLICATE)
+    else:
+        padding_w = 0
+    return img,padding_h,padding_w
+def mkdir_s(path: str):
+    """Create directory in specified path, if not exists."""
+    if not os.path.exists(path):
+        os.makedirs(path)
+if __name__ =='__main__':
+    parts, border_x, border_y, max_x, max_y = split_img(im,512,512,strides=512)
+    result = combine_imgs(border_x,border_y,parts, max_y, max_x,512, 512, 512)

data/preprocess/sauvola_binarize.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import cv2
+# importing required libraries
+import numpy as np
+import cv2
+from skimage.filters import threshold_sauvola
+import glob
+from tqdm import tqdm
+import os
+from skimage import io
+def SauvolaModBinarization(image,n1=51,n2=51,k1=0.3,k2=0.3,default=True):
+    '''
+	 Binarization using Sauvola's algorithm
+		@name : SauvolaModBinarization
+	 parameters
+		@param image (numpy array of shape (3/1) of type np.uint8): color or gray scale image
+	 optional parameters
+		@param n1 (int) : window size for running sauvola during the first pass
+		@param n2 (int): window size for running sauvola during the second pass
+		@param k1 (float): k value corresponding to sauvola during the first pass
+		@param k2 (float): k value corresponding to sauvola during the second pass
+		@param default (bool) : bollean variable to set the above parameter as default.
+			@param default is set to True : thus default values of the above optional parameters (n1,n2,k1,k2) are set to
+				n1 = 5 % of min(image height, image width)
+				n2 = 10 % of min(image height, image width)
+				k1 = 0.5
+				k2 = 0.5
+		Returns
+			@return A binary image of same size as @param image
+		@cite https://drive.google.com/file/d/1D3CyI5vtodPJeZaD2UV5wdcaIMtkBbdZ/view?usp=sharing
+    '''
+    if(default):
+        n1 = int(0.05*min(image.shape[0],image.shape[1]))
+        if (n1%2==0):
+            n1 = n1+1
+        n2 = int(0.1*min(image.shape[0],image.shape[1]))
+        if (n2%2==0):
+            n2 = n2+1
+        k1 = 0.5
+        k2 = 0.5
+    if(image.ndim==3):
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    else:
+        gray = np.copy(image)
+    T1 = threshold_sauvola(gray, window_size=n1,k=k1)
+    max_val = np.amax(gray)
+    min_val = np.amin(gray)
+    C = np.copy(T1)
+    C = C.astype(np.float32)
+    C[gray > T1] = (gray[gray > T1] - T1[gray > T1])/(max_val - T1[gray > T1])
+    C[gray <= T1] = 0
+    C = C * 255.0
+    new_in = np.copy(C.astype(np.uint8))
+    T2 = threshold_sauvola(new_in, window_size=n2,k=k2)
+    binary = np.copy(gray)
+    binary[new_in <= T2] = 0
+    binary[new_in > T2] = 255
+    return binary,T2
+def dtprompt(img):
+    x = cv2.Sobel(img,cv2.CV_16S,1,0)
+    y = cv2.Sobel(img,cv2.CV_16S,0,1)
+    absX = cv2.convertScaleAbs(x)   # 转回uint8
+    absY = cv2.convertScaleAbs(y)
+    high_frequency = cv2.addWeighted(absX,0.5,absY,0.5,0)
+    high_frequency = cv2.cvtColor(high_frequency,cv2.COLOR_BGR2GRAY)
+    return high_frequency
+im_paths = glob.glob('imgs/*')
+for im_path in tqdm(im_paths):
+    if '_bin.' in im_path:
+        continue
+    if '_thr.' in im_path:
+        continue
+    if '_gradient.' in im_path:
+        continue
+    im = cv2.imread(im_path)
+    result,thresh = SauvolaModBinarization(im)
+    gradient = dtprompt(im)
+    thresh = thresh.astype(np.uint8)
+    cv2.imwrite(im_path.replace('.','_bin.'),result)
+    cv2.imwrite(im_path.replace('.','_thr.'),thresh)
+    cv2.imwrite(im_path.replace('.','_gradient.'),gradient)

data/preprocess/shadow_extraction.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import cv2
+import numpy as np
+import glob
+import os
+from tqdm import tqdm
+import random
+im_paths = glob.glob('./img/*/*')
+random.shuffle(im_paths)
+for im_path in tqdm(im_paths):
+    # im_path = './img/1/23-180_5-y4_Page_034-wVO0001-L1_3-T_6600-I_5535.png'
+    if '-L1_' in im_path:
+        alb_path = im_path.split('-L1_')[0].replace('img/','alb/') + '.png'
+    else:
+        alb_path = im_path.split('-L2_')[0].replace('img/','alb/') + '.png'
+    if not os.path.exists(alb_path):
+        print(im_path)
+        print(alb_path)
+    im = cv2.imread(im_path)
+    alb = cv2.imread(alb_path)
+    _, mask = cv2.threshold(cv2.cvtColor(alb,cv2.COLOR_BGR2GRAY), 1, 255, cv2.THRESH_BINARY)
+    ## clean
+    # std = np.max(np.std(alb,axis=-1))
+    # print(std)
+    im_min = np.min(im,axis=-1)
+    kernel = np.ones((3,3))
+    mask_erode = cv2.dilate(mask,kernel=kernel)
+    mask_erode = cv2.erode(mask_erode,kernel=kernel)
+    mask_erode = cv2.erode(mask_erode,iterations=4,kernel=kernel)
+    metric = np.min(im_min[mask_erode==255])
+    metric_num = 0
+    if metric==0 or metric==1:
+        metric_num = np.sum(im_min[mask_erode==255]==metric)
+        if metric_num>=20:
+            alb_temp = alb.astype(np.float64)
+            alb_temp[alb_temp==0] = alb_temp[alb_temp==0]+1e-5
+            shadow = np.clip(im.astype(np.float64)/alb_temp,0,1)
+            shadow = (shadow*255).astype(np.uint8)
+            shadow_path = im_path.replace('img/','temp/')
+            cv2.imwrite(shadow_path,shadow)
+            continue
+    alb_temp = alb.astype(np.float64)
+    alb_temp[alb_temp==0] = alb_temp[alb_temp==0]+1e-5
+    shadow = np.clip(im.astype(np.float64)/alb_temp,0,1)
+    shadow = (shadow*255).astype(np.uint8)
+    shadow_path = im_path.replace('img/','shadow/')
+    cv2.imwrite(shadow_path,shadow)
+    mask_path = im_path.replace('img/','mask/')
+    cv2.imwrite(mask_path,mask)
+    # cv2.imshow('im',im)
+    # cv2.imshow('alb',alb)
+    # cv2.imshow('shadow',shadow)
+    # cv2.imshow('mask_erode',mask_erode)
+    # print(im_min[mask_erode==255])
+    # print(metric,metric_num)
+    # cv2.waitKey(0)

eval.py ADDED Viewed

	@@ -0,0 +1,369 @@

+import os
+import cv2
+import glob
+import utils
+import argparse
+import numpy as np
+from tqdm import tqdm
+from skimage.metrics import structural_similarity,peak_signal_noise_ratio
+import torch
+from utils import convert_state_dict
+from models import restormer_arch
+from data.preprocess.crop_merge_image import stride_integral
+os.sys.path.append('./data/MBD/')
+from data.MBD.infer import net1_net2_infer_single_im
+def dewarp_prompt(img):
+    mask = net1_net2_infer_single_im(img,'data/MBD/checkpoint/mbd.pkl')
+    base_coord = utils.getBasecoord(256,256)/256
+    img[mask==0]=0
+    mask = cv2.resize(mask,(256,256))/255
+    return img,np.concatenate((base_coord,np.expand_dims(mask,-1)),-1)
+def deshadow_prompt(img):
+    h,w = img.shape[:2]
+    # img = cv2.resize(img,(128,128))
+    img = cv2.resize(img,(1024,1024))
+    rgb_planes = cv2.split(img)
+    result_planes = []
+    result_norm_planes = []
+    bg_imgs = []
+    for plane in rgb_planes:
+        dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
+        bg_img = cv2.medianBlur(dilated_img, 21)
+        bg_imgs.append(bg_img)
+        diff_img = 255 - cv2.absdiff(plane, bg_img)
+        norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
+        result_planes.append(diff_img)
+        result_norm_planes.append(norm_img)
+    bg_imgs = cv2.merge(bg_imgs)
+    bg_imgs = cv2.resize(bg_imgs,(w,h))
+    # result = cv2.merge(result_planes)
+    result_norm = cv2.merge(result_norm_planes)
+    result_norm[result_norm==0]=1
+    shadow_map = np.clip(img.astype(float)/result_norm.astype(float)*255,0,255).astype(np.uint8)
+    shadow_map = cv2.resize(shadow_map,(w,h))
+    shadow_map = cv2.cvtColor(shadow_map,cv2.COLOR_BGR2GRAY)
+    shadow_map = cv2.cvtColor(shadow_map,cv2.COLOR_GRAY2BGR)
+    # return shadow_map
+    return bg_imgs
+def deblur_prompt(img):
+    x = cv2.Sobel(img,cv2.CV_16S,1,0)
+    y = cv2.Sobel(img,cv2.CV_16S,0,1)
+    absX = cv2.convertScaleAbs(x)   # 转回uint8
+    absY = cv2.convertScaleAbs(y)
+    high_frequency = cv2.addWeighted(absX,0.5,absY,0.5,0)
+    high_frequency = cv2.cvtColor(high_frequency,cv2.COLOR_BGR2GRAY)
+    high_frequency = cv2.cvtColor(high_frequency,cv2.COLOR_GRAY2BGR)
+    return high_frequency
+def appearance_prompt(img):
+    h,w = img.shape[:2]
+    # img = cv2.resize(img,(128,128))
+    img = cv2.resize(img,(1024,1024))
+    rgb_planes = cv2.split(img)
+    result_planes = []
+    result_norm_planes = []
+    for plane in rgb_planes:
+        dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
+        bg_img = cv2.medianBlur(dilated_img, 21)
+        diff_img = 255 - cv2.absdiff(plane, bg_img)
+        norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
+        result_planes.append(diff_img)
+        result_norm_planes.append(norm_img)
+    result_norm = cv2.merge(result_norm_planes)
+    result_norm = cv2.resize(result_norm,(w,h))
+    return result_norm
+def binarization_promptv2(img):
+    result,thresh = utils.SauvolaModBinarization(img)
+    thresh = thresh.astype(np.uint8)
+    result[result>155]=255
+    result[result<=155]=0
+    x = cv2.Sobel(img,cv2.CV_16S,1,0)
+    y = cv2.Sobel(img,cv2.CV_16S,0,1)
+    absX = cv2.convertScaleAbs(x)   # 转回uint8
+    absY = cv2.convertScaleAbs(y)
+    high_frequency = cv2.addWeighted(absX,0.5,absY,0.5,0)
+    high_frequency = cv2.cvtColor(high_frequency,cv2.COLOR_BGR2GRAY)
+    return np.concatenate((np.expand_dims(thresh,-1),np.expand_dims(high_frequency,-1),np.expand_dims(result,-1)),-1)
+def dewarping(model,im_path):
+    INPUT_SIZE=256
+    im_org = cv2.imread(im_path)
+    im_masked, prompt_org = dewarp_prompt(im_org.copy())
+    h,w = im_masked.shape[:2]
+    im_masked = im_masked.copy()
+    im_masked = cv2.resize(im_masked,(INPUT_SIZE,INPUT_SIZE))
+    im_masked = im_masked / 255.0
+    im_masked = torch.from_numpy(im_masked.transpose(2,0,1)).unsqueeze(0)
+    im_masked = im_masked.float().to(DEVICE)
+    prompt = torch.from_numpy(prompt_org.transpose(2,0,1)).unsqueeze(0)
+    prompt = prompt.float().to(DEVICE)
+    in_im = torch.cat((im_masked,prompt),dim=1)
+    # inference
+    base_coord = utils.getBasecoord(INPUT_SIZE,INPUT_SIZE)/INPUT_SIZE
+    model = model.float()
+    with torch.no_grad():
+        pred = model(in_im)
+        pred = pred[0][:2].permute(1,2,0).cpu().numpy()
+        pred = pred+base_coord
+    ## smooth
+    for i in range(15):
+        pred = cv2.blur(pred,(3,3),borderType=cv2.BORDER_REPLICATE)
+    pred = cv2.resize(pred,(w,h))*(w,h)
+    pred = pred.astype(np.float32)
+    out_im = cv2.remap(im_org,pred[:,:,0],pred[:,:,1],cv2.INTER_LINEAR)
+    prompt_org = (prompt_org*255).astype(np.uint8)
+    prompt_org = cv2.resize(prompt_org,im_org.shape[:2][::-1])
+    return prompt_org[:,:,0],prompt_org[:,:,1],prompt_org[:,:,2],out_im
+def appearance(model,im_path):
+    MAX_SIZE=1600
+    # obtain im and prompt
+    im_org = cv2.imread(im_path)
+    h,w = im_org.shape[:2]
+    prompt = appearance_prompt(im_org)
+    in_im = np.concatenate((im_org,prompt),-1)
+    # constrain the max resolution
+    if max(w,h) < MAX_SIZE:
+        in_im,padding_h,padding_w = stride_integral(in_im,8)
+    else:
+        in_im = cv2.resize(in_im,(MAX_SIZE,MAX_SIZE))
+    # normalize
+    in_im = in_im / 255.0
+    in_im = torch.from_numpy(in_im.transpose(2,0,1)).unsqueeze(0)
+    # inference
+    in_im = in_im.half().to(DEVICE)
+    model = model.half()
+    with torch.no_grad():
+        pred = model(in_im)
+        pred = torch.clamp(pred,0,1)
+        pred = pred[0].permute(1,2,0).cpu().numpy()
+        pred = (pred*255).astype(np.uint8)
+        if max(w,h) < MAX_SIZE:
+            out_im = pred[padding_h:,padding_w:]
+        else:
+            pred[pred==0] = 1
+            shadow_map = cv2.resize(im_org,(MAX_SIZE,MAX_SIZE)).astype(float)/pred.astype(float)
+            shadow_map = cv2.resize(shadow_map,(w,h))
+            shadow_map[shadow_map==0]=0.00001
+            out_im = np.clip(im_org.astype(float)/shadow_map,0,255).astype(np.uint8)
+    return prompt[:,:,0],prompt[:,:,1],prompt[:,:,2],out_im
+def deshadowing(model,im_path):
+    MAX_SIZE=1600
+    # obtain im and prompt
+    im_org = cv2.imread(im_path)
+    h,w = im_org.shape[:2]
+    prompt = deshadow_prompt(im_org)
+    in_im = np.concatenate((im_org,prompt),-1)
+    # constrain the max resolution
+    if max(w,h) < MAX_SIZE:
+        in_im,padding_h,padding_w = stride_integral(in_im,8)
+    else:
+        in_im = cv2.resize(in_im,(MAX_SIZE,MAX_SIZE))
+    # normalize
+    in_im = in_im / 255.0
+    in_im = torch.from_numpy(in_im.transpose(2,0,1)).unsqueeze(0)
+    # inference
+    in_im = in_im.half().to(DEVICE)
+    model = model.half()
+    with torch.no_grad():
+        pred = model(in_im)
+        pred = torch.clamp(pred,0,1)
+        pred = pred[0].permute(1,2,0).cpu().numpy()
+        pred = (pred*255).astype(np.uint8)
+        if max(w,h) < MAX_SIZE:
+            out_im = pred[padding_h:,padding_w:]
+        else:
+            pred[pred==0]=1
+            shadow_map = cv2.resize(im_org,(MAX_SIZE,MAX_SIZE)).astype(float)/pred.astype(float)
+            shadow_map = cv2.resize(shadow_map,(w,h))
+            shadow_map[shadow_map==0]=0.00001
+            out_im = np.clip(im_org.astype(float)/shadow_map,0,255).astype(np.uint8)
+    return prompt[:,:,0],prompt[:,:,1],prompt[:,:,2],out_im
+def deblurring(model,im_path):
+    # setup image
+    im_org = cv2.imread(im_path)
+    in_im,padding_h,padding_w = stride_integral(im_org,8)
+    prompt = deblur_prompt(in_im)
+    in_im = np.concatenate((in_im,prompt),-1)
+    in_im = in_im / 255.0
+    in_im = torch.from_numpy(in_im.transpose(2,0,1)).unsqueeze(0)
+    in_im = in_im.half().to(DEVICE)
+    # inference
+    model.to(DEVICE)
+    model.eval()
+    model = model.half()
+    with torch.no_grad():
+        pred = model(in_im)
+        pred = torch.clamp(pred,0,1)
+        pred = pred[0].permute(1,2,0).cpu().numpy()
+        pred = (pred*255).astype(np.uint8)
+        out_im = pred[padding_h:,padding_w:]
+    return prompt[:,:,0],prompt[:,:,1],prompt[:,:,2],out_im
+def binarization(model,im_path):
+    im_org = cv2.imread(im_path)
+    im,padding_h,padding_w = stride_integral(im_org,8)
+    prompt = binarization_promptv2(im)
+    h,w = im.shape[:2]
+    in_im = np.concatenate((im,prompt),-1)
+    in_im = in_im / 255.0
+    in_im = torch.from_numpy(in_im.transpose(2,0,1)).unsqueeze(0)
+    in_im = in_im.to(DEVICE)
+    model = model.half()
+    in_im = in_im.half()
+    with torch.no_grad():
+        pred = model(in_im,'binarization')
+        pred = pred[:,:2,:,:]
+        pred = torch.max(torch.softmax(pred,1),1)[1]
+        pred = pred[0].cpu().numpy()
+        pred = (pred*255).astype(np.uint8)
+        pred = cv2.resize(pred,(w,h))
+        out_im = pred[padding_h:,padding_w:]
+    return prompt[:,:,0],prompt[:,:,1],prompt[:,:,2],out_im
+def get_args():
+    parser = argparse.ArgumentParser(description='Params')
+    parser.add_argument('--model_path', nargs='?', type=str, default='./checkpoints/docres.pkl',help='Path of the saved checkpoint')
+    parser.add_argument('--dataset', nargs='?', type=str, default='./distorted/',help='Path of input document image')
+    args = parser.parse_args()
+    assert args.dataset in all_datasets.keys(), 'Unregisted dataset, dataset must be one of '+', '.join(all_datasets)
+    return args
+def model_init(args):
+   # prepare model
+    model = restormer_arch.Restormer(
+        inp_channels=6,
+        out_channels=3,
+        dim = 48,
+        num_blocks = [2,3,3,4],
+        num_refinement_blocks = 4,
+        heads = [1,2,4,8],
+        ffn_expansion_factor = 2.66,
+        bias = False,
+        LayerNorm_type = 'WithBias',
+        dual_pixel_task = True
+    )
+    if DEVICE.type == 'cpu':
+        state = convert_state_dict(torch.load(args.model_path, map_location='cpu')['model_state'])
+    else:
+        state = convert_state_dict(torch.load(args.model_path, map_location='cuda:0')['model_state'])
+    model.load_state_dict(state)
+    model.eval()
+    model = model.to(DEVICE)
+    return model
+def inference_one_im(model,im_path,task):
+    if task=='dewarping':
+        prompt1,prompt2,prompt3,restorted = dewarping(model,im_path)
+    elif task=='deshadowing':
+        prompt1,prompt2,prompt3,restorted = deshadowing(model,im_path)
+    elif task=='appearance':
+        prompt1,prompt2,prompt3,restorted = appearance(model,im_path)
+    elif task=='deblurring':
+        prompt1,prompt2,prompt3,restorted = deblurring(model,im_path)
+    elif task=='binarization':
+        prompt1,prompt2,prompt3,restorted = binarization(model,im_path)
+    elif task=='end2end':
+        prompt1,prompt2,prompt3,restorted = dewarping(model,im_path)
+        cv2.imwrite('./temp.jpg',restorted)
+        prompt1,prompt2,prompt3,restorted = deshadowing(model,'./temp.jpg')
+        cv2.imwrite('./temp.jpg',restorted)
+        prompt1,prompt2,prompt3,restorted = appearance(model,'./temp.jpg')
+        os.remove('./temp.jpg')
+    return prompt1,prompt2,prompt3,restorted
+if __name__ == '__main__':
+    all_datasets = {'dir300':'dewarping','kligler':'deshadowing','jung':'deshadowing','osr':'deshadowing','docunet_docaligner':'appearance','realdae':'appearance','tdd':'deblurring','dibco18':'binarization'}
+    ## model init
+    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    args = get_args()
+    model = model_init(args)
+    ## inference
+    print('Predicting')
+    task = all_datasets[args.dataset]
+    im_paths = glob.glob(os.path.join('./data/eval/',args.dataset,'*_in.*'))
+    for im_path in tqdm(im_paths):
+        _,_,_,restorted = inference_one_im(model,im_path,task)
+        cv2.imwrite(im_path.replace('_in','_docres'),restorted)
+    ## obtain metric
+    print('Metric calculating')
+    if task == 'dewarping':
+        exit()
+    elif task=='deshadowing' or task=='appearance' or task=='deblurring':
+        psnr = []
+        ssim = []
+        for im_path in tqdm(im_paths):
+            pred = cv2.imread(im_path.replace('_in','_docres'))
+            gt = cv2.imread(im_path.replace('_in','_gt'))
+            ssim.append(structural_similarity(pred,gt,multichannel=True))
+            psnr.append(peak_signal_noise_ratio(pred, gt))
+        print(args.dataset)
+        print('ssim:',np.mean(ssim))
+        print('psnr:',np.mean(psnr))
+    elif task=='binarization':
+        fmeasures, pfmeasures,psnrs = [],[],[]
+        for im_path in tqdm(im_paths):
+            pred = cv2.imread(im_path.replace('_in','_docres'))
+            gt = cv2.imread(im_path.replace('_in','_gt'))
+            pred = cv2.cvtColor(pred,cv2.COLOR_BGR2GRAY)
+            gt = cv2.cvtColor(gt,cv2.COLOR_BGR2GRAY)
+            pred[pred>155]=255
+            pred[pred<=155]=0
+            gt[gt>155]=255
+            gt[gt<=155]=0
+            fmeasure, pfmeasure,psnr,_,_,_ = utils.bin_metric(pred,gt)
+            fmeasures.append(fmeasure)
+            pfmeasures.append(pfmeasure)
+            psnrs.append(psnr)
+        print(args.dataset)
+        print('fmeasure:',np.mean(fmeasures))
+        print('pfmeasure:',np.mean(pfmeasures))
+        print('psnr:',np.mean(psnrs))

inference.py ADDED Viewed

	@@ -0,0 +1,341 @@

+import os
+import cv2
+import utils
+import argparse
+import numpy as np
+import torch
+from utils import convert_state_dict
+from models import restormer_arch
+from data.preprocess.crop_merge_image import stride_integral
+os.sys.path.append('./data/MBD/')
+from data.MBD.infer import net1_net2_infer_single_im
+def dewarp_prompt(img):
+    mask = net1_net2_infer_single_im(img,'data/MBD/checkpoint/mbd.pkl')
+    base_coord = utils.getBasecoord(256,256)/256
+    img[mask==0]=0
+    mask = cv2.resize(mask,(256,256))/255
+    return img,np.concatenate((base_coord,np.expand_dims(mask,-1)),-1)
+def deshadow_prompt(img):
+    h,w = img.shape[:2]
+    # img = cv2.resize(img,(128,128))
+    img = cv2.resize(img,(1024,1024))
+    rgb_planes = cv2.split(img)
+    result_planes = []
+    result_norm_planes = []
+    bg_imgs = []
+    for plane in rgb_planes:
+        dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
+        bg_img = cv2.medianBlur(dilated_img, 21)
+        bg_imgs.append(bg_img)
+        diff_img = 255 - cv2.absdiff(plane, bg_img)
+        norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
+        result_planes.append(diff_img)
+        result_norm_planes.append(norm_img)
+    bg_imgs = cv2.merge(bg_imgs)
+    bg_imgs = cv2.resize(bg_imgs,(w,h))
+    # result = cv2.merge(result_planes)
+    result_norm = cv2.merge(result_norm_planes)
+    result_norm[result_norm==0]=1
+    shadow_map = np.clip(img.astype(float)/result_norm.astype(float)*255,0,255).astype(np.uint8)
+    shadow_map = cv2.resize(shadow_map,(w,h))
+    shadow_map = cv2.cvtColor(shadow_map,cv2.COLOR_BGR2GRAY)
+    shadow_map = cv2.cvtColor(shadow_map,cv2.COLOR_GRAY2BGR)
+    # return shadow_map
+    return bg_imgs
+def deblur_prompt(img):
+    x = cv2.Sobel(img,cv2.CV_16S,1,0)
+    y = cv2.Sobel(img,cv2.CV_16S,0,1)
+    absX = cv2.convertScaleAbs(x)   # 转回uint8
+    absY = cv2.convertScaleAbs(y)
+    high_frequency = cv2.addWeighted(absX,0.5,absY,0.5,0)
+    high_frequency = cv2.cvtColor(high_frequency,cv2.COLOR_BGR2GRAY)
+    high_frequency = cv2.cvtColor(high_frequency,cv2.COLOR_GRAY2BGR)
+    return high_frequency
+def appearance_prompt(img):
+    h,w = img.shape[:2]
+    # img = cv2.resize(img,(128,128))
+    img = cv2.resize(img,(1024,1024))
+    rgb_planes = cv2.split(img)
+    result_planes = []
+    result_norm_planes = []
+    for plane in rgb_planes:
+        dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
+        bg_img = cv2.medianBlur(dilated_img, 21)
+        diff_img = 255 - cv2.absdiff(plane, bg_img)
+        norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
+        result_planes.append(diff_img)
+        result_norm_planes.append(norm_img)
+    result_norm = cv2.merge(result_norm_planes)
+    result_norm = cv2.resize(result_norm,(w,h))
+    return result_norm
+def binarization_promptv2(img):
+    result,thresh = utils.SauvolaModBinarization(img)
+    thresh = thresh.astype(np.uint8)
+    result[result>155]=255
+    result[result<=155]=0
+    x = cv2.Sobel(img,cv2.CV_16S,1,0)
+    y = cv2.Sobel(img,cv2.CV_16S,0,1)
+    absX = cv2.convertScaleAbs(x)   # 转回uint8
+    absY = cv2.convertScaleAbs(y)
+    high_frequency = cv2.addWeighted(absX,0.5,absY,0.5,0)
+    high_frequency = cv2.cvtColor(high_frequency,cv2.COLOR_BGR2GRAY)
+    return np.concatenate((np.expand_dims(thresh,-1),np.expand_dims(high_frequency,-1),np.expand_dims(result,-1)),-1)
+def dewarping(model,im_path):
+    INPUT_SIZE=256
+    im_org = cv2.imread(im_path)
+    im_masked, prompt_org = dewarp_prompt(im_org.copy())
+    h,w = im_masked.shape[:2]
+    im_masked = im_masked.copy()
+    im_masked = cv2.resize(im_masked,(INPUT_SIZE,INPUT_SIZE))
+    im_masked = im_masked / 255.0
+    im_masked = torch.from_numpy(im_masked.transpose(2,0,1)).unsqueeze(0)
+    im_masked = im_masked.float().to(DEVICE)
+    prompt = torch.from_numpy(prompt_org.transpose(2,0,1)).unsqueeze(0)
+    prompt = prompt.float().to(DEVICE)
+    in_im = torch.cat((im_masked,prompt),dim=1)
+    # inference
+    base_coord = utils.getBasecoord(INPUT_SIZE,INPUT_SIZE)/INPUT_SIZE
+    model = model.float()
+    with torch.no_grad():
+        pred = model(in_im)
+        pred = pred[0][:2].permute(1,2,0).cpu().numpy()
+        pred = pred+base_coord
+    ## smooth
+    for i in range(15):
+        pred = cv2.blur(pred,(3,3),borderType=cv2.BORDER_REPLICATE)
+    pred = cv2.resize(pred,(w,h))*(w,h)
+    pred = pred.astype(np.float32)
+    out_im = cv2.remap(im_org,pred[:,:,0],pred[:,:,1],cv2.INTER_LINEAR)
+    prompt_org = (prompt_org*255).astype(np.uint8)
+    prompt_org = cv2.resize(prompt_org,im_org.shape[:2][::-1])
+    return prompt_org[:,:,0],prompt_org[:,:,1],prompt_org[:,:,2],out_im
+def appearance(model,im_path):
+    MAX_SIZE=1600
+    # obtain im and prompt
+    im_org = cv2.imread(im_path)
+    h,w = im_org.shape[:2]
+    prompt = appearance_prompt(im_org)
+    in_im = np.concatenate((im_org,prompt),-1)
+    # constrain the max resolution
+    if max(w,h) < MAX_SIZE:
+        in_im,padding_h,padding_w = stride_integral(in_im,8)
+    else:
+        in_im = cv2.resize(in_im,(MAX_SIZE,MAX_SIZE))
+    # normalize
+    in_im = in_im / 255.0
+    in_im = torch.from_numpy(in_im.transpose(2,0,1)).unsqueeze(0)
+    # inference
+    in_im = in_im.half().to(DEVICE)
+    model = model.half()
+    with torch.no_grad():
+        pred = model(in_im)
+        pred = torch.clamp(pred,0,1)
+        pred = pred[0].permute(1,2,0).cpu().numpy()
+        pred = (pred*255).astype(np.uint8)
+        if max(w,h) < MAX_SIZE:
+            out_im = pred[padding_h:,padding_w:]
+        else:
+            pred[pred==0] = 1
+            shadow_map = cv2.resize(im_org,(MAX_SIZE,MAX_SIZE)).astype(float)/pred.astype(float)
+            shadow_map = cv2.resize(shadow_map,(w,h))
+            shadow_map[shadow_map==0]=0.00001
+            out_im = np.clip(im_org.astype(float)/shadow_map,0,255).astype(np.uint8)
+    return prompt[:,:,0],prompt[:,:,1],prompt[:,:,2],out_im
+def deshadowing(model,im_path):
+    MAX_SIZE=1600
+    # obtain im and prompt
+    im_org = cv2.imread(im_path)
+    h,w = im_org.shape[:2]
+    prompt = deshadow_prompt(im_org)
+    in_im = np.concatenate((im_org,prompt),-1)
+    # constrain the max resolution
+    if max(w,h) < MAX_SIZE:
+        in_im,padding_h,padding_w = stride_integral(in_im,8)
+    else:
+        in_im = cv2.resize(in_im,(MAX_SIZE,MAX_SIZE))
+    # normalize
+    in_im = in_im / 255.0
+    in_im = torch.from_numpy(in_im.transpose(2,0,1)).unsqueeze(0)
+    # inference
+    in_im = in_im.half().to(DEVICE)
+    model = model.half()
+    with torch.no_grad():
+        pred = model(in_im)
+        pred = torch.clamp(pred,0,1)
+        pred = pred[0].permute(1,2,0).cpu().numpy()
+        pred = (pred*255).astype(np.uint8)
+        if max(w,h) < MAX_SIZE:
+            out_im = pred[padding_h:,padding_w:]
+        else:
+            pred[pred==0]=1
+            shadow_map = cv2.resize(im_org,(MAX_SIZE,MAX_SIZE)).astype(float)/pred.astype(float)
+            shadow_map = cv2.resize(shadow_map,(w,h))
+            shadow_map[shadow_map==0]=0.00001
+            out_im = np.clip(im_org.astype(float)/shadow_map,0,255).astype(np.uint8)
+    return prompt[:,:,0],prompt[:,:,1],prompt[:,:,2],out_im
+def deblurring(model,im_path):
+    # setup image
+    im_org = cv2.imread(im_path)
+    in_im,padding_h,padding_w = stride_integral(im_org,8)
+    prompt = deblur_prompt(in_im)
+    in_im = np.concatenate((in_im,prompt),-1)
+    in_im = in_im / 255.0
+    in_im = torch.from_numpy(in_im.transpose(2,0,1)).unsqueeze(0)
+    in_im = in_im.half().to(DEVICE)
+    # inference
+    model.to(DEVICE)
+    model.eval()
+    model = model.half()
+    with torch.no_grad():
+        pred = model(in_im)
+        pred = torch.clamp(pred,0,1)
+        pred = pred[0].permute(1,2,0).cpu().numpy()
+        pred = (pred*255).astype(np.uint8)
+        out_im = pred[padding_h:,padding_w:]
+    return prompt[:,:,0],prompt[:,:,1],prompt[:,:,2],out_im
+def binarization(model,im_path):
+    im_org = cv2.imread(im_path)
+    im,padding_h,padding_w = stride_integral(im_org,8)
+    prompt = binarization_promptv2(im)
+    h,w = im.shape[:2]
+    in_im = np.concatenate((im,prompt),-1)
+    in_im = in_im / 255.0
+    in_im = torch.from_numpy(in_im.transpose(2,0,1)).unsqueeze(0)
+    in_im = in_im.to(DEVICE)
+    model = model.half()
+    in_im = in_im.half()
+    with torch.no_grad():
+        pred = model(in_im)
+        pred = pred[:,:2,:,:]
+        pred = torch.max(torch.softmax(pred,1),1)[1]
+        pred = pred[0].cpu().numpy()
+        pred = (pred*255).astype(np.uint8)
+        pred = cv2.resize(pred,(w,h))
+        out_im = pred[padding_h:,padding_w:]
+    return prompt[:,:,0],prompt[:,:,1],prompt[:,:,2],out_im
+def get_args():
+    parser = argparse.ArgumentParser(description='Params')
+    parser.add_argument('--model_path', nargs='?', type=str, default='./checkpoints/docres.pkl',help='Path of the saved checkpoint')
+    parser.add_argument('--im_path', nargs='?', type=str, default='./distorted/',
+                        help='Path of input document image')
+    parser.add_argument('--out_folder', nargs='?', type=str, default='./restorted/',
+                        help='Folder of the output images')
+    parser.add_argument('--task', nargs='?', type=str, default='dewarping',
+                        help='task that need to be executed')
+    parser.add_argument('--save_dtsprompt', nargs='?', type=int, default=0,
+                        help='Width of the input image')
+    args = parser.parse_args()
+    possible_tasks = ['dewarping','deshadowing','appearance','deblurring','binarization','end2end']
+    assert args.task in possible_tasks, 'Unsupported task, task must be one of '+', '.join(possible_tasks)
+    return args
+def model_init(args):
+   # prepare model
+    model = restormer_arch.Restormer(
+        inp_channels=6,
+        out_channels=3,
+        dim = 48,
+        num_blocks = [2,3,3,4],
+        num_refinement_blocks = 4,
+        heads = [1,2,4,8],
+        ffn_expansion_factor = 2.66,
+        bias = False,
+        LayerNorm_type = 'WithBias',
+        dual_pixel_task = True
+    )
+    if DEVICE.type == 'cpu':
+        state = convert_state_dict(torch.load(args.model_path, map_location='cpu')['model_state'])
+    else:
+        state = convert_state_dict(torch.load(args.model_path, map_location='cuda:0')['model_state'])
+    model.load_state_dict(state)
+    model.eval()
+    model = model.to(DEVICE)
+    return model
+def inference_one_im(model,im_path,task):
+    if task=='dewarping':
+        prompt1,prompt2,prompt3,restorted = dewarping(model,im_path)
+    elif task=='deshadowing':
+        prompt1,prompt2,prompt3,restorted = deshadowing(model,im_path)
+    elif task=='appearance':
+        prompt1,prompt2,prompt3,restorted = appearance(model,im_path)
+    elif task=='deblurring':
+        prompt1,prompt2,prompt3,restorted = deblurring(model,im_path)
+    elif task=='binarization':
+        prompt1,prompt2,prompt3,restorted = binarization(model,im_path)
+    elif task=='end2end':
+        prompt1,prompt2,prompt3,restorted = dewarping(model,im_path)
+        cv2.imwrite('restorted/step1.jpg',restorted)
+        prompt1,prompt2,prompt3,restorted = deshadowing(model,'restorted/step1.jpg')
+        cv2.imwrite('restorted/step2.jpg',restorted)
+        prompt1,prompt2,prompt3,restorted = appearance(model,'restorted/step2.jpg')
+        # os.remove('restorted/step1.jpg')
+        # os.remove('restorted/step2.jpg')
+    return prompt1,prompt2,prompt3,restorted
+if __name__ == '__main__':
+    ## model init
+    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    args = get_args()
+    model = model_init(args)
+    ## inference
+    prompt1,prompt2,prompt3,restorted = inference_one_im(model,args.im_path,args.task)
+    ## results saving
+    im_name = os.path.split(args.im_path)[-1]
+    im_format = '.'+im_name.split('.')[-1]
+    save_path = os.path.join(args.out_folder,im_name.replace(im_format,'_'+args.task+im_format))
+    cv2.imwrite(save_path,restorted)
+    if args.save_dtsprompt:
+        cv2.imwrite(save_path.replace(im_format,'_prompt1'+im_format),prompt1)
+        cv2.imwrite(save_path.replace(im_format,'_prompt2'+im_format),prompt2)
+        cv2.imwrite(save_path.replace(im_format,'_prompt3'+im_format),prompt3)

loaders/docres_loader.py ADDED Viewed

	@@ -0,0 +1,558 @@

+import os
+from os.path import join as pjoin
+import collections
+import json
+from numpy.lib.histograms import histogram_bin_edges
+import torch
+import numpy as np
+import cv2
+import random
+import torch.nn.functional as F
+from torch.utils import data
+import glob
+class DocResTrainDataset(data.Dataset):
+    def __init__(self, dataset={}, img_size=512,):
+        json_paths = dataset['json_paths']
+        self.task = dataset['task']
+        self.size = img_size
+        self.im_path = dataset['im_path']
+        self.datas = []
+        for json_path in json_paths:
+            with open(json_path,'r') as f:
+                data = json.load(f)
+                self.datas += data
+        self.background_paths = glob.glob('/data2/jiaxin/Training_Data/dewarping/doc_3d/background/*/*/*')
+        self.shadow_paths = glob.glob('/data2/jiaxin/Training_Data/illumination/doc3dshadow/new_shadow/*/*')
+    def __len__(self):
+        return len(self.datas)
+    def __getitem__(self, index):
+        data = self.datas[index]
+        in_im,gt_im,dtsprompt = self.data_processing(self.task,data)
+        return torch.cat((in_im,dtsprompt),0), gt_im
+    def data_processing(self,task,data):
+        if task=='deblurring':
+            ## image prepare
+            in_im = cv2.imread(os.path.join(self.im_path,data['in_path']))
+            gt_im = cv2.imread(os.path.join(self.im_path,data['gt_path']))
+            dtsprompt = self.deblur_dtsprompt(in_im)
+            ## get prompt
+            in_im, gt_im,dtsprompt = self.randomcrop([in_im,gt_im,dtsprompt])
+            in_im  = self.rgbim_transform(in_im)
+            gt_im  = self.rgbim_transform(gt_im)
+            dtsprompt  = self.rgbim_transform(dtsprompt)
+        elif task =='dewarping':
+            ## image prepare
+            in_im = cv2.imread(os.path.join(self.im_path,data['in_path']))
+            mask = cv2.imread(os.path.join(self.im_path,data['mask_path']))[:,:,0]
+            bm = np.load(os.path.join(self.im_path,data['gt_path'])).astype(np.float)  #-> 0-448
+            bm = cv2.resize(bm,(448,448))
+            ## add background
+            background = cv2.imread(random.choice(self.background_paths))
+            min_length = min(background.shape[:2])
+            crop_size = random.randint(int(min_length*0.5),min_length-1)
+            shift_y = np.random.randint(0,background.shape[1]-crop_size)
+            shift_x = np.random.randint(0,background.shape[0]-crop_size)
+            background = background[shift_x:shift_x+crop_size,shift_y:shift_y+crop_size,:]
+            background = cv2.resize(background,(448,448))
+            if np.mean(in_im[mask==0])<10:
+                in_im[mask==0]=background[mask==0]
+            ## random crop and get prompt
+            in_im,mask,bm = self.random_margin_bm(in_im,mask,bm) # bm-> 0-1
+            in_im = cv2.resize(in_im,(self.size,self.size))
+            mask = cv2.resize(mask,(self.size,self.size))
+            mask_aug = self.mask_augment(mask)
+            in_im[mask_aug==0]=0
+            bm = cv2.resize(bm,(self.size,self.size)) # bm-> 0-1
+            bm_shift = (bm*self.size - self.getBasecoord(self.size,self.size))/self.size
+            base_coord = self.getBasecoord(self.size,self.size)/self.size
+            in_im = self.rgbim_transform(in_im)
+            base_coord = base_coord.transpose(2, 0, 1)
+            base_coord = torch.from_numpy(base_coord)
+            bm_shift = bm_shift.transpose(2, 0, 1)
+            bm_shift = torch.from_numpy(bm_shift)
+            mask[mask>155] = 255
+            mask[mask<=155] = 0
+            mask = mask/255
+            mask = np.expand_dims(mask,-1)
+            mask = mask.transpose(2, 0, 1)
+            mask = torch.from_numpy(mask)
+            mask_aug[mask_aug>155] = 255
+            mask_aug[mask_aug<=155] = 0
+            mask_aug = mask_aug/255
+            mask_aug = np.expand_dims(mask_aug,-1)
+            mask_aug = mask_aug.transpose(2, 0, 1)
+            mask_aug = torch.from_numpy(mask_aug)
+            in_im = in_im
+            gt_im = torch.cat((bm_shift,mask),0)
+            dtsprompt = torch.cat((base_coord,mask_aug),0)
+        elif task == 'binarization':
+            ## image prepare
+            in_im = cv2.imread(os.path.join(self.im_path,data['in_path']))
+            gt_im = cv2.imread(os.path.join(self.im_path,data['gt_path']))
+            ## get prompt
+            thr = cv2.imread(os.path.join(self.im_path,data['thr_path']))
+            bin_map = cv2.imread(os.path.join(self.im_path,data['bin_path']))
+            gradient = cv2.imread(os.path.join(self.im_path,data['gradient_path']))
+            bin_map[bin_map>155]=255
+            bin_map[bin_map<=155]=0
+            in_im, gt_im,thr,bin_map,gradient = self.randomcrop([in_im,gt_im,thr,bin_map,gradient])
+            in_im = self.randomAugment_binarization(in_im)
+            gt_im[gt_im>155]=255
+            gt_im[gt_im<=155]=0
+            gt_im = gt_im[:,:,0]
+            ## transform
+            in_im  = self.rgbim_transform(in_im)
+            thr  = self.rgbim_transform(thr)
+            gradient  = self.rgbim_transform(gradient)
+            bin_map  = self.rgbim_transform(bin_map)
+            gt_im = gt_im.astype(np.float)/255.
+            gt_im = torch.from_numpy(gt_im)
+            gt_im = gt_im.unsqueeze(0)
+            dtsprompt = torch.cat((thr[0].unsqueeze(0),gradient[0].unsqueeze(0),bin_map[0].unsqueeze(0)),0)
+        elif task == 'deshadowing':
+            in_im = cv2.imread(os.path.join(self.im_path,data['in_path']))
+            gt_im = cv2.imread(os.path.join(self.im_path,data['gt_path']))
+            shadow_im = self.deshadow_dtsprompt(in_im)
+            if 'fsdsrd' in data['in_path']:
+                in_im = cv2.resize(in_im,(512,512))
+                gt_im = cv2.resize(gt_im,(512,512))
+                shadow_im = cv2.resize(shadow_im,(512,512))
+                in_im, gt_im,shadow_im = self.randomcrop([in_im,gt_im,shadow_im])
+            else:
+                in_im, gt_im,shadow_im = self.randomcrop([in_im,gt_im,shadow_im])
+            in_im  = self.rgbim_transform(in_im)
+            gt_im  = self.rgbim_transform(gt_im)
+            shadow_im = self.rgbim_transform(shadow_im)
+            dtsprompt = shadow_im
+        elif task == 'appearance':
+            if 'in_path' in data.keys():
+                cap_im = cv2.imread(os.path.join(self.im_path,data['in_path']))
+                gt_im = cv2.imread(os.path.join(self.im_path,data['gt_path']))
+                gt_im,cap_im = self.randomcrop_realdae(gt_im,cap_im)
+                cap_im = self.appearance_randomAugmentv1(cap_im)
+                enhance_result = self.appearance_dtsprompt(cap_im)
+            else:
+                gt_im = cv2.imread(os.path.join(self.im_path,data['gt_path']))
+                bleed_im = cv2.imread(os.path.join(self.im_path,random.choice(self.datas)['gt_path']))
+                bleed_im = cv2.resize(bleed_im,gt_im.shape[:2][::-1])
+                gt_im = self.randomcrop([gt_im])[0]
+                bleed_im = self.randomcrop([bleed_im])[0]
+                cap_im = self.bleed_trough(gt_im,bleed_im)
+                shadow_path = random.choice(self.shadow_paths)
+                shadow_im = cv2.imread(shadow_path)
+                cap_im = self.appearance_randomAugmentv2(cap_im,shadow_im)
+                enhance_result = self.appearance_dtsprompt(cap_im)
+            in_im = self.rgbim_transform(cap_im)
+            gt_im = self.rgbim_transform(gt_im)
+            dtsprompt = self.rgbim_transform(enhance_result)
+        return in_im, gt_im,dtsprompt
+    def randomcrop(self,im_list):
+        im_num = len(im_list)
+        ## random scale rotate
+        if random.uniform(0,1) <= 0.8:
+            y,x = im_list[0].shape[:2]
+            angle = random.uniform(-180,180)
+            scale = random.uniform(0.7,1.5)
+            M = cv2.getRotationMatrix2D((int(x/2),int(y/2)),angle,scale)
+            for i in range(im_num):
+                im_list[i] = cv2.warpAffine(im_list[i],M,(x,y),borderValue=(255,255,255))
+        ## random crop
+        crop_size = self.size
+        for i in range(im_num):
+            h,w = im_list[i].shape[:2]
+            h = max(h,crop_size)
+            w = max(w,crop_size)
+            im_list[i] = cv2.resize(im_list[i],(w,h))
+        if h==crop_size:
+            shift_y=0
+        else:
+            shift_y = np.random.randint(0,h-crop_size)
+        if w==crop_size:
+            shift_x=0
+        else:
+            shift_x = np.random.randint(0,w-crop_size)
+        for i in range(im_num):
+            im_list[i] = im_list[i][shift_y:shift_y+crop_size,shift_x:shift_x+crop_size,:]
+        return im_list
+    def deblur_dtsprompt(self,img):
+        x = cv2.Sobel(img,cv2.CV_16S,1,0)
+        y = cv2.Sobel(img,cv2.CV_16S,0,1)
+        absX = cv2.convertScaleAbs(x)   # 转回uint8
+        absY = cv2.convertScaleAbs(y)
+        high_frequency = cv2.addWeighted(absX,0.5,absY,0.5,0)
+        high_frequency = cv2.cvtColor(high_frequency,cv2.COLOR_BGR2GRAY)
+        high_frequency = cv2.cvtColor(high_frequency,cv2.COLOR_GRAY2BGR)
+        return high_frequency
+    def appearance_dtsprompt(self,img):
+        h,w = img.shape[:2]
+        img = cv2.resize(img,(1024,1024))
+        rgb_planes = cv2.split(img)
+        result_planes = []
+        result_norm_planes = []
+        for plane in rgb_planes:
+            dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
+            bg_img = cv2.medianBlur(dilated_img, 21)
+            diff_img = 255 - cv2.absdiff(plane, bg_img)
+            norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
+            result_planes.append(diff_img)
+            result_norm_planes.append(norm_img)
+        result_norm = cv2.merge(result_norm_planes)
+        result_norm = cv2.resize(result_norm,(w,h))
+        return result_norm
+    def rgbim_transform(self,im):
+        im = im.astype(np.float)/255.
+        im = im.transpose(2, 0, 1)
+        im = torch.from_numpy(im)
+        return im
+    def random_margin_bm(self,in_im,msk,bm):
+        size = in_im.shape[:2]
+        [y, x] = (msk).nonzero()
+        minx = min(x)
+        maxx = max(x)
+        miny = min(y)
+        maxy = max(y)
+        s = 20
+        s = int(20*size[0]/128)
+        difference = int(5*size[0]/128)
+        cx1 = random.randint(0, s - difference)
+        cx2 = random.randint(0, s - difference) + 1
+        cy1 = random.randint(0, s - difference)
+        cy2 = random.randint(0, s - difference) + 1
+        t = miny-s+cy1
+        b = size[0]-maxy-s+cy2
+        l = minx-s+cx1
+        r = size[1]-maxx-s+cx2
+        t = max(0,t)
+        b = max(0,b)
+        l = max(0,l)
+        r = max(0,r)
+        in_im = in_im[t:size[0]-b,l:size[1]-r]
+        msk = msk[t:size[0]-b,l:size[1]-r]
+        bm[:,:,1]=bm[:,:,1]-t
+        bm[:,:,0]=bm[:,:,0]-l
+        bm=bm/np.array([448-l-r, 448-t-b])
+        return in_im,msk,bm
+    def mask_augment(self,mask):
+        if random.uniform(0,1) <= 0.6:
+            if random.uniform(0,1) <= 0.5:
+                mask = cv2.resize(mask,(64,64))
+            else:
+                mask = cv2.resize(mask,(128,128))
+            mask = cv2.resize(mask,(256,256))
+        mask[mask>155] = 255
+        mask[mask<=155] = 0
+        return mask
+    def bleed_trough(self, in_im, bleed_im):
+        if random.uniform(0,1) <= 0.5:
+            if random.uniform(0,1) <= 0.8:
+                ksize = np.random.randint(1,2)*2 + 1
+                bleed_im = cv2.blur(bleed_im,(ksize,ksize))
+            bleed_im = cv2.flip(bleed_im,1)
+            alpha = random.uniform(0.75,1)
+            in_im = cv2.addWeighted(in_im,alpha,bleed_im,1-alpha,0)
+        return in_im
+    def getBasecoord(self,h,w):
+        base_coord0 = np.tile(np.arange(h).reshape(h,1),(1,w)).astype(np.float32)
+        base_coord1 = np.tile(np.arange(w).reshape(1,w),(h,1)).astype(np.float32)
+        base_coord = np.concatenate((np.expand_dims(base_coord1,-1),np.expand_dims(base_coord0,-1)),-1)
+        return base_coord
+    def randomcrop_realdae(self,gt_im,cap_im):
+        if random.uniform(0,1) <= 0.5:
+            y,x = gt_im.shape[:2]
+            angle = random.uniform(-30,30)
+            scale = random.uniform(0.8,1.5)
+            M = cv2.getRotationMatrix2D((int(x/2),int(y/2)),angle,scale)
+            gt_im = cv2.warpAffine(gt_im,M,(x,y),borderValue=(255,255,255))
+            cap_im = cv2.warpAffine(cap_im,M,(x,y),borderValue=(255,255,255))
+        crop_size = self.size
+        if gt_im.shape[0] <= crop_size:
+            gt_im = cv2.copyMakeBorder(gt_im,crop_size-gt_im.shape[0]+1,0,0,0,borderType=cv2.BORDER_CONSTANT,value=(255,255,255))
+            cap_im = cv2.copyMakeBorder(cap_im,crop_size-cap_im.shape[0]+1,0,0,0,borderType=cv2.BORDER_CONSTANT,value=(255,255,255))
+        if gt_im.shape[1] <= crop_size:
+            gt_im = cv2.copyMakeBorder(gt_im,0,0,crop_size-gt_im.shape[1]+1,0,borderType=cv2.BORDER_CONSTANT,value=(255,255,255))
+            cap_im = cv2.copyMakeBorder(cap_im,0,0,crop_size-cap_im.shape[1]+1,0,borderType=cv2.BORDER_CONSTANT,value=(255,255,255))
+        shift_y = np.random.randint(0,gt_im.shape[1]-crop_size)
+        shift_x = np.random.randint(0,gt_im.shape[0]-crop_size)
+        gt_im = gt_im[shift_x:shift_x+crop_size,shift_y:shift_y+crop_size,:]
+        cap_im = cap_im[shift_x:shift_x+crop_size,shift_y:shift_y+crop_size,:]
+        return gt_im,cap_im
+    def randomAugment_binarization(self,in_img):
+        h,w = in_img.shape[:2]
+        ## brightness
+        if random.uniform(0,1) <= 0.5:
+            high = 1.3
+            low = 0.8
+            ratio = np.random.uniform(low,high)
+            in_img = in_img.astype(np.float64)*ratio
+            in_img = np.clip(in_img,0,255).astype(np.uint8)
+        ## contrast
+        if random.uniform(0,1) <= 0.5:
+            high = 1.3
+            low = 0.8
+            ratio = np.random.uniform(low,high)
+            gray = cv2.cvtColor(in_img,cv2.COLOR_BGR2GRAY)
+            mean = np.mean(gray)
+            mean_array = np.ones_like(in_img).astype(np.float64)*mean
+            in_img = in_img.astype(np.float64)*ratio + mean_array*(1-ratio)
+            in_img = np.clip(in_img,0,255).astype(np.uint8)
+        ## color
+        if random.uniform(0,1) <= 0.5:
+            high = 0.2
+            low = 0.1
+            ratio = np.random.uniform(0.1,0.3)
+            random_color = np.random.randint(50,200,3).reshape(1,1,3)
+            random_color = (random_color*ratio).astype(np.uint8)
+            random_color = np.tile(random_color,(self.size,self.size,1))
+            in_img = in_img.astype(np.float64)*(1-ratio) + random_color
+            in_img = np.clip(in_img,0,255).astype(np.uint8)
+        return in_img
+    def deshadow_dtsprompt(self,img):
+        h,w = img.shape[:2]
+        img = cv2.resize(img,(1024,1024))
+        rgb_planes = cv2.split(img)
+        result_planes = []
+        result_norm_planes = []
+        bg_imgs = []
+        for plane in rgb_planes:
+            dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
+            bg_img = cv2.medianBlur(dilated_img, 21)
+            bg_imgs.append(bg_img)
+            diff_img = 255 - cv2.absdiff(plane, bg_img)
+            norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
+            result_planes.append(diff_img)
+            result_norm_planes.append(norm_img)
+        result_norm = cv2.merge(result_norm_planes)
+        bg_imgs = cv2.merge(bg_imgs)
+        bg_imgs = cv2.resize(bg_imgs,(w,h))
+        return bg_imgs
+    def randomAugment(self,in_img,gt_img,shadow_img):
+        h,w = in_img.shape[:2]
+        # random crop
+        crop_size = random.randint(128,1024)
+        if shadow_img.shape[0] <= crop_size:
+            shadow_img = cv2.copyMakeBorder(shadow_img,crop_size-shadow_img.shape[0]+1,0,0,0,borderType=cv2.BORDER_CONSTANT,value=(128,128,128))
+        if shadow_img.shape[1] <= crop_size:
+            shadow_img = cv2.copyMakeBorder(shadow_img,0,0,crop_size-shadow_img.shape[1]+1,0,borderType=cv2.BORDER_CONSTANT,value=(128,128,128))
+        shift_y = np.random.randint(0,shadow_img.shape[1]-crop_size)
+        shift_x = np.random.randint(0,shadow_img.shape[0]-crop_size)
+        shadow_img = shadow_img[shift_x:shift_x+crop_size,shift_y:shift_y+crop_size,:]
+        shadow_img = cv2.resize(shadow_img,(w,h))
+        in_img = in_img.astype(np.float64)*(shadow_img.astype(np.float64)+1)/255
+        in_img = np.clip(in_img,0,255).astype(np.uint8)
+        ## brightness
+        if random.uniform(0,1) <= 0.5:
+            high = 1.3
+            low = 0.8
+            ratio = np.random.uniform(low,high)
+            in_img = in_img.astype(np.float64)*ratio
+            in_img = np.clip(in_img,0,255).astype(np.uint8)
+        ## contrast
+        if random.uniform(0,1) <= 0.5:
+            high = 1.3
+            low = 0.8
+            ratio = np.random.uniform(low,high)
+            gray = cv2.cvtColor(in_img,cv2.COLOR_BGR2GRAY)
+            mean = np.mean(gray)
+            mean_array = np.ones_like(in_img).astype(np.float64)*mean
+            in_img = in_img.astype(np.float64)*ratio + mean_array*(1-ratio)
+            in_img = np.clip(in_img,0,255).astype(np.uint8)
+        ## color
+        if random.uniform(0,1) <= 0.5:
+            high = 0.2
+            low = 0.1
+            ratio = np.random.uniform(0.1,0.3)
+            random_color = np.random.randint(50,200,3).reshape(1,1,3)
+            random_color = (random_color*ratio).astype(np.uint8)
+            random_color = np.tile(random_color,(self.img_size[0],self.img_size[1],1))
+            in_img = in_img.astype(np.float64)*(1-ratio) + random_color
+            in_img = np.clip(in_img,0,255).astype(np.uint8)
+        ## scale and rotate
+        if random.uniform(0,1) <= 0:
+            y,x = self.img_size
+            angle = random.uniform(-180,180)
+            scale = random.uniform(0.5,1.5)
+            M = cv2.getRotationMatrix2D((int(x/2),int(y/2)),angle,scale)
+            in_img = cv2.warpAffine(in_img,M,(x,y),borderValue=0)
+            gt_img = cv2.warpAffine(gt_img,M,(x,y),borderValue=0)
+        # add noise
+        ## jpegcompression
+        quanlity_high = 95
+        quanlity_low = 45
+        quanlity = int(np.random.randint(quanlity_low,quanlity_high))
+        encode_param = [int(cv2.IMWRITE_JPEG_QUALITY),quanlity]
+        result, encimg = cv2.imencode('.jpg',in_img,encode_param)
+        in_img = cv2.imdecode(encimg,1).astype(np.uint8)
+        ## gaussiannoise
+        mean = 0
+        sigma = 0.02
+        noise_ratio = 0.004
+        num_noise = int(np.ceil(noise_ratio*w))
+        coords = [np.random.randint(0,i-1,int(num_noise)) for i in [h,w]]
+        gauss = np.random.normal(mean,sigma,num_noise*3)*255
+        guass = np.reshape(gauss,(-1,3))
+        in_img = in_img.astype(np.float64)
+        in_img[tuple(coords)] += guass
+        in_img = np.clip(in_img,0,255).astype(np.uint8)
+        ## blur
+        ksize = np.random.randint(1,2)*2 + 1
+        in_img = cv2.blur(in_img,(ksize,ksize))
+        ## erase
+        if random.uniform(0,1) <= 0.7:
+            for i in range(100):
+                area = int(np.random.uniform(0.01,0.05)*h*w)
+                ration = np.random.uniform(0.3,1/0.3)
+                h_shift = int(np.sqrt(area*ration))
+                w_shift = int(np.sqrt(area/ration))
+                if (h_shift<h) and (w_shift<w):
+                    break
+            h_start = np.random.randint(0,h-h_shift)
+            w_start = np.random.randint(0,w-w_shift)
+            randm_area = np.random.randint(low=0,high=255,size=(h_shift,w_shift,3))
+            in_img[h_start:h_start+h_shift,w_start:w_start+w_shift,:] = randm_area
+        return in_img, gt_img
+    def appearance_randomAugmentv1(self,in_img):
+        ## brightness
+        if random.uniform(0,1) <= 0.8:
+            high = 1.3
+            low = 0.5
+            ratio = np.random.uniform(low,high)
+            in_img = in_img.astype(np.float64)*ratio
+            in_img = np.clip(in_img,0,255).astype(np.uint8)
+        ## contrast
+        if random.uniform(0,1) <= 0.8:
+            high = 1.3
+            low = 0.5
+            ratio = np.random.uniform(low,high)
+            gray = cv2.cvtColor(in_img,cv2.COLOR_BGR2GRAY)
+            mean = np.mean(gray)
+            mean_array = np.ones_like(in_img).astype(np.float64)*mean
+            in_img = in_img.astype(np.float64)*ratio + mean_array*(1-ratio)
+            in_img = np.clip(in_img,0,255).astype(np.uint8)
+        ## color
+        if random.uniform(0,1) <= 0.8:
+            high = 0.2
+            low = 0.1
+            ratio = np.random.uniform(0.1,0.3)
+            random_color = np.random.randint(50,200,3).reshape(1,1,3)
+            random_color = (random_color*ratio).astype(np.uint8)
+            random_color = np.tile(random_color,(self.size,self.size,1))
+            in_img = in_img.astype(np.float64)*(1-ratio) + random_color
+            in_img = np.clip(in_img,0,255).astype(np.uint8)
+        return in_img
+    def appearance_randomAugmentv2(self,in_img,shadow_img):
+        h,w = in_img.shape[:2]
+        # random crop
+        crop_size = random.randint(96,1024)
+        if shadow_img.shape[0] <= crop_size:
+            shadow_img = cv2.resize(shadow_img,(crop_size+1,crop_size+1))
+        if shadow_img.shape[1] <= crop_size:
+            shadow_img = cv2.resize(shadow_img,(crop_size+1,crop_size+1))
+        shift_y = np.random.randint(0,shadow_img.shape[1]-crop_size)
+        shift_x = np.random.randint(0,shadow_img.shape[0]-crop_size)
+        shadow_img = shadow_img[shift_x:shift_x+crop_size,shift_y:shift_y+crop_size,:]
+        shadow_img = cv2.resize(shadow_img,(w,h))
+        in_img = in_img.astype(np.float64)*(shadow_img.astype(np.float64)+1)/255
+        in_img = np.clip(in_img,0,255).astype(np.uint8)
+        ## brightness
+        if random.uniform(0,1) <= 0.8:
+            high = 1.3
+            low = 0.5
+            ratio = np.random.uniform(low,high)
+            in_img = in_img.astype(np.float64)*ratio
+            in_img = np.clip(in_img,0,255).astype(np.uint8)
+        ## contrast
+        if random.uniform(0,1) <= 0.8:
+            high = 1.3
+            low = 0.5
+            ratio = np.random.uniform(low,high)
+            gray = cv2.cvtColor(in_img,cv2.COLOR_BGR2GRAY)
+            mean = np.mean(gray)
+            mean_array = np.ones_like(in_img).astype(np.float64)*mean
+            in_img = in_img.astype(np.float64)*ratio + mean_array*(1-ratio)
+            in_img = np.clip(in_img,0,255).astype(np.uint8)
+        ## color
+        if random.uniform(0,1) <= 0.8:
+            high = 0.2
+            low = 0.1
+            ratio = np.random.uniform(0.1,0.3)
+            random_color = np.random.randint(50,200,3).reshape(1,1,3)
+            random_color = (random_color*ratio).astype(np.uint8)
+            random_color = np.tile(random_color,(h,w,1))
+            in_img = in_img.astype(np.float64)*(1-ratio) + random_color
+            in_img = np.clip(in_img,0,255).astype(np.uint8)
+        if random.uniform(0,1) <= 0.8:
+            quanlity_high = 95
+            quanlity_low = 45
+            quanlity = int(np.random.randint(quanlity_low,quanlity_high))
+            encode_param = [int(cv2.IMWRITE_JPEG_QUALITY),quanlity]
+            result, encimg = cv2.imencode('.jpg',in_img,encode_param)
+            in_img = cv2.imdecode(encimg,1).astype(np.uint8)
+        return in_img

models/restormer_arch.py ADDED Viewed

	@@ -0,0 +1,308 @@

+## Restormer: Efficient Transformer for High-Resolution Image Restoration
+## Syed Waqas Zamir, Aditya Arora, Salman Khan, Munawar Hayat, Fahad Shahbaz Khan, and Ming-Hsuan Yang
+## https://arxiv.org/abs/2111.09881
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from pdb import set_trace as stx
+import numbers
+from einops import rearrange
+##########################################################################
+## Layer Norm
+def to_3d(x):
+    return rearrange(x, 'b c h w -> b (h w) c')
+def to_4d(x,h,w):
+    return rearrange(x, 'b (h w) c -> b c h w',h=h,w=w)
+class BiasFree_LayerNorm(nn.Module):
+    def __init__(self, normalized_shape):
+        super(BiasFree_LayerNorm, self).__init__()
+        if isinstance(normalized_shape, numbers.Integral):
+            normalized_shape = (normalized_shape,)
+        normalized_shape = torch.Size(normalized_shape)
+        assert len(normalized_shape) == 1
+        self.weight = nn.Parameter(torch.ones(normalized_shape))
+        self.normalized_shape = normalized_shape
+    def forward(self, x):
+        sigma = x.var(-1, keepdim=True, unbiased=False)
+        return x / torch.sqrt(sigma+1e-5) * self.weight
+class WithBias_LayerNorm(nn.Module):
+    def __init__(self, normalized_shape):
+        super(WithBias_LayerNorm, self).__init__()
+        if isinstance(normalized_shape, numbers.Integral):
+            normalized_shape = (normalized_shape,)
+        normalized_shape = torch.Size(normalized_shape)
+        assert len(normalized_shape) == 1
+        self.weight = nn.Parameter(torch.ones(normalized_shape))
+        self.bias = nn.Parameter(torch.zeros(normalized_shape))
+        self.normalized_shape = normalized_shape
+    def forward(self, x):
+        mu = x.mean(-1, keepdim=True)
+        sigma = x.var(-1, keepdim=True, unbiased=False)
+        return (x - mu) / torch.sqrt(sigma+1e-5) * self.weight + self.bias
+class LayerNorm(nn.Module):
+    def __init__(self, dim, LayerNorm_type):
+        super(LayerNorm, self).__init__()
+        if LayerNorm_type =='BiasFree':
+            self.body = BiasFree_LayerNorm(dim)
+        else:
+            self.body = WithBias_LayerNorm(dim)
+    def forward(self, x):
+        h, w = x.shape[-2:]
+        return to_4d(self.body(to_3d(x)), h, w)
+##########################################################################
+## Gated-Dconv Feed-Forward Network (GDFN)
+class FeedForward(nn.Module):
+    def __init__(self, dim, ffn_expansion_factor, bias):
+        super(FeedForward, self).__init__()
+        hidden_features = int(dim*ffn_expansion_factor)
+        self.project_in = nn.Conv2d(dim, hidden_features*2, kernel_size=1, bias=bias)
+        self.dwconv = nn.Conv2d(hidden_features*2, hidden_features*2, kernel_size=3, stride=1, padding=1, groups=hidden_features*2, bias=bias)
+        self.project_out = nn.Conv2d(hidden_features, dim, kernel_size=1, bias=bias)
+    def forward(self, x):
+        x = self.project_in(x)
+        x1, x2 = self.dwconv(x).chunk(2, dim=1)
+        x = F.gelu(x1) * x2
+        x = self.project_out(x)
+        return x
+##########################################################################
+## Multi-DConv Head Transposed Self-Attention (MDTA)
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads, bias):
+        super(Attention, self).__init__()
+        self.num_heads = num_heads
+        self.temperature = nn.Parameter(torch.ones(num_heads, 1, 1))
+        self.qkv = nn.Conv2d(dim, dim*3, kernel_size=1, bias=bias)
+        self.qkv_dwconv = nn.Conv2d(dim*3, dim*3, kernel_size=3, stride=1, padding=1, groups=dim*3, bias=bias)
+        self.project_out = nn.Conv2d(dim, dim, kernel_size=1, bias=bias)
+    def forward(self, x):
+        b,c,h,w = x.shape
+        qkv = self.qkv_dwconv(self.qkv(x))
+        q,k,v = qkv.chunk(3, dim=1)
+        q = rearrange(q, 'b (head c) h w -> b head c (h w)', head=self.num_heads)
+        k = rearrange(k, 'b (head c) h w -> b head c (h w)', head=self.num_heads)
+        v = rearrange(v, 'b (head c) h w -> b head c (h w)', head=self.num_heads)
+        q = torch.nn.functional.normalize(q, dim=-1)
+        k = torch.nn.functional.normalize(k, dim=-1)
+        attn = (q @ k.transpose(-2, -1)) * self.temperature
+        attn = attn.softmax(dim=-1)
+        out = (attn @ v)
+        out = rearrange(out, 'b head c (h w) -> b (head c) h w', head=self.num_heads, h=h, w=w)
+        out = self.project_out(out)
+        return out
+##########################################################################
+class TransformerBlock(nn.Module):
+    def __init__(self, dim, num_heads, ffn_expansion_factor, bias, LayerNorm_type):
+        super(TransformerBlock, self).__init__()
+        self.norm1 = LayerNorm(dim, LayerNorm_type)
+        self.attn = Attention(dim, num_heads, bias)
+        self.norm2 = LayerNorm(dim, LayerNorm_type)
+        self.ffn = FeedForward(dim, ffn_expansion_factor, bias)
+    def forward(self, x):
+        x = x + self.attn(self.norm1(x))
+        x = x + self.ffn(self.norm2(x))
+        return x
+##########################################################################
+## Overlapped image patch embedding with 3x3 Conv
+class OverlapPatchEmbed(nn.Module):
+    def __init__(self, in_c=3, embed_dim=48, bias=False):
+        super(OverlapPatchEmbed, self).__init__()
+        self.proj = nn.Conv2d(in_c, embed_dim, kernel_size=3, stride=1, padding=1, bias=bias)
+    def forward(self, x):
+        x = self.proj(x)
+        return x
+##########################################################################
+## Resizing modules
+class Downsample(nn.Module):
+    def __init__(self, n_feat):
+        super(Downsample, self).__init__()
+        self.body = nn.Sequential(nn.Conv2d(n_feat, n_feat//2, kernel_size=3, stride=1, padding=1, bias=False),
+                                  nn.PixelUnshuffle(2))
+    def forward(self, x):
+        return self.body(x)
+class Upsample(nn.Module):
+    def __init__(self, n_feat):
+        super(Upsample, self).__init__()
+        self.body = nn.Sequential(nn.Conv2d(n_feat, n_feat*2, kernel_size=3, stride=1, padding=1, bias=False),
+                                  nn.PixelShuffle(2))
+    def forward(self, x):
+        return self.body(x)
+##########################################################################
+##---------- Restormer -----------------------
+class Restormer(nn.Module):
+    def __init__(self,
+        inp_channels=3,
+        out_channels=3,
+        dim = 48,
+        num_blocks = [4,6,6,8],
+        num_refinement_blocks = 4,
+        heads = [1,2,4,8],
+        ffn_expansion_factor = 2.66,
+        bias = False,
+        LayerNorm_type = 'WithBias',   ## Other option 'BiasFree'
+        dual_pixel_task = True        ## True for dual-pixel defocus deblurring only. Also set inp_channels=6
+    ):
+        super(Restormer, self).__init__()
+        self.patch_embed = OverlapPatchEmbed(inp_channels, dim)
+        self.encoder_level1 = nn.Sequential(*[TransformerBlock(dim=dim, num_heads=heads[0], ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[0])])
+        self.down1_2 = Downsample(dim) ## From Level 1 to Level 2
+        self.encoder_level2 = nn.Sequential(*[TransformerBlock(dim=int(dim*2**1), num_heads=heads[1], ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[1])])
+        self.down2_3 = Downsample(int(dim*2**1)) ## From Level 2 to Level 3
+        self.encoder_level3 = nn.Sequential(*[TransformerBlock(dim=int(dim*2**2), num_heads=heads[2], ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[2])])
+        self.down3_4 = Downsample(int(dim*2**2)) ## From Level 3 to Level 4
+        self.latent = nn.Sequential(*[TransformerBlock(dim=int(dim*2**3), num_heads=heads[3], ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[3])])
+        self.up4_3 = Upsample(int(dim*2**3)) ## From Level 4 to Level 3
+        self.reduce_chan_level3 = nn.Conv2d(int(dim*2**3), int(dim*2**2), kernel_size=1, bias=bias)
+        self.decoder_level3 = nn.Sequential(*[TransformerBlock(dim=int(dim*2**2), num_heads=heads[2], ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[2])])
+        self.up3_2 = Upsample(int(dim*2**2)) ## From Level 3 to Level 2
+        self.reduce_chan_level2 = nn.Conv2d(int(dim*2**2), int(dim*2**1), kernel_size=1, bias=bias)
+        self.decoder_level2 = nn.Sequential(*[TransformerBlock(dim=int(dim*2**1), num_heads=heads[1], ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[1])])
+        self.up2_1 = Upsample(int(dim*2**1))  ## From Level 2 to Level 1  (NO 1x1 conv to reduce channels)
+        self.decoder_level1 = nn.Sequential(*[TransformerBlock(dim=int(dim*2**1), num_heads=heads[0], ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_blocks[0])])
+        self.refinement = nn.Sequential(*[TransformerBlock(dim=int(dim*2**1), num_heads=heads[0], ffn_expansion_factor=ffn_expansion_factor, bias=bias, LayerNorm_type=LayerNorm_type) for i in range(num_refinement_blocks)])
+        #### For Dual-Pixel Defocus Deblurring Task ####
+        self.dual_pixel_task = dual_pixel_task
+        if self.dual_pixel_task:
+            self.skip_conv = nn.Conv2d(dim, int(dim*2**1), kernel_size=1, bias=bias)
+        ###########################
+        self.output = nn.Conv2d(int(dim*2**1), out_channels, kernel_size=3, stride=1, padding=1, bias=bias)
+    def forward(self, inp_img,task=''):
+        inp_enc_level1 = self.patch_embed(inp_img)
+        out_enc_level1 = self.encoder_level1(inp_enc_level1)
+        inp_enc_level2 = self.down1_2(out_enc_level1)
+        out_enc_level2 = self.encoder_level2(inp_enc_level2)
+        inp_enc_level3 = self.down2_3(out_enc_level2)
+        out_enc_level3 = self.encoder_level3(inp_enc_level3)
+        inp_enc_level4 = self.down3_4(out_enc_level3)
+        latent = self.latent(inp_enc_level4)
+        inp_dec_level3 = self.up4_3(latent)
+        inp_dec_level3 = torch.cat([inp_dec_level3, out_enc_level3], 1)
+        inp_dec_level3 = self.reduce_chan_level3(inp_dec_level3)
+        out_dec_level3 = self.decoder_level3(inp_dec_level3)
+        inp_dec_level2 = self.up3_2(out_dec_level3)
+        inp_dec_level2 = torch.cat([inp_dec_level2, out_enc_level2], 1)
+        inp_dec_level2 = self.reduce_chan_level2(inp_dec_level2)
+        out_dec_level2 = self.decoder_level2(inp_dec_level2)
+        inp_dec_level1 = self.up2_1(out_dec_level2)
+        inp_dec_level1 = torch.cat([inp_dec_level1, out_enc_level1], 1)
+        out_dec_level1 = self.decoder_level1(inp_dec_level1)
+        out_dec_level1 = self.refinement(out_dec_level1)
+        out_dec_level1 = out_dec_level1 + self.skip_conv(inp_enc_level1)
+        out_dec_level1 = self.output(out_dec_level1)
+        return out_dec_level1
+if __name__ == '__main__':
+    from torchtoolbox.tools import summary
+    model = Restormer(
+        inp_channels=6,
+        out_channels=3,
+        dim = 48,
+        # num_blocks = [4,6,6,8],
+        num_blocks = [2,3,3,4],
+        num_refinement_blocks = 4,
+        heads = [1,2,4,8],
+        ffn_expansion_factor = 2.66,
+        bias = False,
+        LayerNorm_type = 'WithBias',   ## Other option 'BiasFree'
+        dual_pixel_task = True        ## True for dual-pixel defocus deblurring only. Also set inp_channels=6
+    )
+    # model = Restormer(num_blocks=[4, 6, 6, 8], num_heads=[1, 2, 4, 8], channels=[48, 96, 192, 384], num_refinement=4, expansion_factor=2.66)
+    print(summary(model,torch.rand((1, 6, 256, 256))))
+    from thop import profile
+    input = torch.rand((1, 6, 256, 256))
+    gflops,params = profile(model,inputs=(input,))
+    gflops = gflops*2 / 10**9
+    params = params / 10**6
+    print(gflops,'==============')
+    print(params,'==============')

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+--extra-index-url https://download.pytorch.org/whl/cu113
+numpy==1.21.6
+opencv-python-headless>=4.2.0
+scikit-image>=0.19.3
+torch==1.11.0+cu113
+torchvision==0.12.0+cu113
+einops
+tqdm
+gradio
+Pillow

start_train.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m torch.distributed.launch --nproc_per_node 8 --master_port 26413 train.py

train.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import os
+import cv2
+import time
+import random
+import datetime
+import argparse
+import numpy as np
+from tqdm import tqdm
+from piq import ssim,psnr
+from itertools import cycle
+import torch
+import torch.nn as nn
+from torch.utils import data
+import torch.distributed as dist
+from torch.utils.data.distributed import DistributedSampler
+from torch.nn.parallel import DistributedDataParallel as DDP
+from utils import dict2string,mkdir,get_lr,torch2cvimg,second2hours
+from loaders import docres_loader
+from models import restormer_arch
+def seed_torch(seed=1029):
+	random.seed(seed)
+	os.environ['PYTHONHASHSEED'] = str(seed)
+	np.random.seed(seed)
+	torch.manual_seed(seed)
+	torch.cuda.manual_seed(seed)
+	torch.cuda.manual_seed_all(seed)
+	torch.backends.cudnn.benchmark = False
+	torch.backends.cudnn.deterministic = True
+    #torch.use_deterministic_algorithms(True)
+# seed_torch()
+def getBasecoord(h,w):
+    base_coord0 = np.tile(np.arange(h).reshape(h,1),(1,w)).astype(np.float32)
+    base_coord1 = np.tile(np.arange(w).reshape(1,w),(h,1)).astype(np.float32)
+    base_coord = np.concatenate((np.expand_dims(base_coord1,-1),np.expand_dims(base_coord0,-1)),-1)
+    return base_coord
+def train(args):
+    ## DDP init
+    dist.init_process_group(backend='nccl',init_method='env://',timeout=datetime.timedelta(seconds=36000))
+    torch.cuda.set_device(args.local_rank)
+    device = torch.device('cuda',args.local_rank)
+    torch.cuda.manual_seed_all(42)
+    ### Log file:
+    mkdir(args.logdir)
+    mkdir(os.path.join(args.logdir,args.experiment_name))
+    log_file_path=os.path.join(args.logdir,args.experiment_name,'log.txt')
+    log_file=open(log_file_path,'a')
+    log_file.write('\n---------------  '+args.experiment_name+'  ---------------\n')
+    log_file.close()
+    ### Setup tensorboard for visualization
+    if args.tboard:
+        writer = SummaryWriter(os.path.join(args.logdir,args.experiment_name,'runs'),args.experiment_name)
+    ### Setup Dataloader
+    datasets_setting = [
+        {'task':'deblurring','ratio':1,'im_path':'/home/jiaxin/Training_Data/DocRes_data/train/deblurring/','json_paths':['/home/jiaxin/Training_Data/DocRes_data/train/deblurring/tdd/train.json']},
+        {'task':'dewarping','ratio':1,'im_path':'/home/jiaxin/Training_Data/DocRes_data/train/dewarping/','json_paths':['/home/jiaxin/Training_Data/DocRes_data/train/dewarping/doc3d/train_1_19.json']},
+        {'task':'binarization','ratio':1,'im_path':'/home/jiaxin/Training_Data/DocRes_data/train/binarization/','json_paths':['/home/jiaxin/Training_Data/DocRes_data/train/binarization/train.json']},
+        {'task':'deshadowing','ratio':1,'im_path':'/home/jiaxin/Training_Data/DocRes_data/train/deshadowing/','json_paths':['/home/jiaxin/Training_Data/DocRes_data/train/deshadowing/train.json']},
+        {'task':'appearance','ratio':1,'im_path':'/home/jiaxin/Training_Data/DocRes_data/train/appearance/','json_paths':['/home/jiaxin/Training_Data/DocRes_data/train/appearance/trainv2.json']}
+        ]
+    ratios = [dataset_setting['ratio'] for dataset_setting in datasets_setting]
+    datasets = [docres_loader.DocResTrainDataset(dataset=dataset_setting,img_size=args.im_size) for dataset_setting in datasets_setting]
+    trainloaders = [{'task':datasets_setting[i],'loader':data.DataLoader(dataset=datasets[i], sampler=DistributedSampler(datasets[i]), batch_size=args.batch_size, num_workers=2, pin_memory=True,drop_last=True),'iter_loader':iter(data.DataLoader(dataset=datasets[i], sampler=DistributedSampler(datasets[i]), batch_size=args.batch_size, num_workers=2, pin_memory=True,drop_last=True))} for i in range(len(datasets))]
+    ### test loader
+    # for i in tqdm(range(args.total_iter)):
+    #     loader_index = random.choices(list(range(len(trainloaders))),ratios)[0]
+    #     in_im,gt_im = next(trainloaders[loader_index]['iter_loader'])
+    ### Setup Model
+    model = restormer_arch.Restormer(
+        inp_channels=6,
+        out_channels=3,
+        dim = 48,
+        num_blocks = [2,3,3,4],
+        num_refinement_blocks = 4,
+        heads = [1,2,4,8],
+        ffn_expansion_factor = 2.66,
+        bias = False,
+        LayerNorm_type = 'WithBias',
+        dual_pixel_task = True
+    )
+    model=DDP(model.cuda(),device_ids=[args.local_rank],output_device=args.local_rank)
+    ### Optimizer
+    optimizer= torch.optim.AdamW(model.parameters(),lr=args.l_rate,weight_decay=5e-4)
+    ### LR Scheduler
+    sched = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.total_iter, eta_min=1e-6, last_epoch=-1)
+    ### load checkpoint
+    iter_start=0
+    if args.resume is not None:
+        print("Loading model and optimizer from checkpoint '{}'".format(args.resume))
+        x = checkpoint['model_state']
+        model.load_state_dict(x,strict=False)
+        iter_start=checkpoint['iter']
+        print("Loaded checkpoint '{}' (iter {})".format(args.resume, iter_start))
+    ###-----------------------------------------Training-----------------------------------------
+    ##initialize
+    scaler = torch.cuda.amp.GradScaler()
+    loss_dict = {}
+    total_step = 0
+    l2 = nn.MSELoss()
+    l1 = nn.L1Loss()
+    ce = nn.CrossEntropyLoss()
+    bce = nn.BCEWithLogitsLoss()
+    m = nn.Sigmoid()
+    best = 0
+    best_ce = 999
+    ## total_steps
+    for iters in range(iter_start,args.total_iter):
+        start_time = time.time()
+        loader_index = random.choices(list(range(len(trainloaders))),ratios)[0]
+        try:
+            in_im,gt_im = next(trainloaders[loader_index]['iter_loader'])
+        except StopIteration:
+            trainloaders[loader_index]['iter_loader']=iter(trainloaders[loader_index]['loader'])
+            in_im,gt_im = next(trainloaders[loader_index]['iter_loader'])
+        in_im = in_im.float().cuda()
+        gt_im = gt_im.float().cuda()
+        binarization_loss,appearance_loss,dewarping_loss,deblurring_loss,deshadowing_loss = 0,0,0,0,0
+        with torch.cuda.amp.autocast():
+            pred_im = model(in_im,trainloaders[loader_index]['task']['task'])
+            if trainloaders[loader_index]['task']['task'] == 'binarization':
+                gt_im = gt_im.long()
+                binarization_loss = ce(pred_im[:,:2,:,:], gt_im[:,0,:,:])
+                loss = binarization_loss
+            elif trainloaders[loader_index]['task']['task'] == 'dewarping':
+                dewarping_loss = l1(pred_im[:,:2,:,:], gt_im[:,:2,:,:])
+                loss = dewarping_loss
+            elif trainloaders[loader_index]['task']['task'] == 'appearance':
+                appearance_loss = l1(pred_im, gt_im)
+                loss = appearance_loss
+            elif trainloaders[loader_index]['task']['task'] == 'deblurring':
+                deblurring_loss = l1(pred_im, gt_im)
+                loss = deblurring_loss
+            elif trainloaders[loader_index]['task']['task'] == 'deshadowing':
+                deshadowing_loss = l1(pred_im, gt_im)
+                loss = deshadowing_loss
+        optimizer.zero_grad()
+        scaler.scale(loss).backward()
+        scaler.step(optimizer)
+        scaler.update()
+        loss_dict['dew_loss']=dewarping_loss.item() if isinstance(dewarping_loss,torch.Tensor) else 0
+        loss_dict['app_loss']=appearance_loss.item() if isinstance(appearance_loss,torch.Tensor) else 0
+        loss_dict['des_loss']=deshadowing_loss.item() if isinstance(deshadowing_loss,torch.Tensor) else 0
+        loss_dict['deb_loss']=deblurring_loss.item() if isinstance(deblurring_loss,torch.Tensor) else 0
+        loss_dict['bin_loss']=binarization_loss.item() if isinstance(binarization_loss,torch.Tensor) else 0
+        end_time = time.time()
+        duration = end_time-start_time
+        ## log
+        if (iters+1) % 10 == 0:
+            ## print
+            print('iters [{}/{}] -- '.format(iters+1,args.total_iter)+dict2string(loss_dict)+' --lr {:6f}'.format(get_lr(optimizer))+' -- time {}'.format(second2hours(duration*(args.total_iter-iters))))
+            ## tbord
+            if args.tboard:
+                for key,value in loss_dict.items():
+                    writer.add_scalar('Train '+key+'/Iterations', value, total_step)
+            ## logfile
+            with open(log_file_path,'a') as f:
+                f.write('iters [{}/{}] -- '.format(iters+1,args.total_iter)+dict2string(loss_dict)+' --lr {:6f}'.format(get_lr(optimizer))+' -- time {}'.format(second2hours(duration*(args.total_iter-iters)))+'\n')
+        if (iters+1) % 5000 == 0:
+            state = {'iters': iters+1,
+                     'model_state': model.state_dict(),
+                     'optimizer_state' : optimizer.state_dict(),}
+            if not os.path.exists(os.path.join(args.logdir,args.experiment_name)):
+                 os.system('mkdir ' + os.path.join(args.logdir,args.experiment_name))
+            if torch.distributed.get_rank()==0:
+                torch.save(state, os.path.join(args.logdir,args.experiment_name,"{}.pkl".format(iters+1)))
+        sched.step()
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Hyperparams')
+    parser.add_argument('--im_size', nargs='?', type=int, default=256,
+                        help='Height of the input image')
+    parser.add_argument('--total_iter', nargs='?', type=int, default=100000,
+                        help='# of the epochs')
+    parser.add_argument('--batch_size', nargs='?', type=int, default=10,
+                        help='Batch Size')
+    parser.add_argument('--l_rate', nargs='?', type=float, default=2e-4,
+                        help='Learning Rate')
+    parser.add_argument('--resume', nargs='?', type=str, default=None,
+                        help='Path to previous saved model to restart from')
+    parser.add_argument('--logdir', nargs='?', type=str, default='./checkpoints/',
+                        help='Path to store the loss logs')
+    parser.add_argument('--tboard', dest='tboard', action='store_true',
+                        help='Enable visualization(s) on tensorboard | False by default')
+    parser.add_argument('--local_rank',type=int,default=0,metavar='N')
+    parser.add_argument('--experiment_name', nargs='?', type=str,default='experiment_name',
+                        help='the name of this experiment')
+    parser.set_defaults(tboard=False)
+    args = parser.parse_args()
+    train(args)

utils.py ADDED Viewed

	@@ -0,0 +1,464 @@

+from collections import OrderedDict
+import os
+import numpy as np
+import torch
+import torch.nn.functional as F
+import os
+from skimage.filters import threshold_sauvola
+import cv2
+def second2hours(seconds):
+    h = seconds//3600
+    seconds %= 3600
+    m = seconds//60
+    seconds %= 60
+    hms = '{:d} H : {:d} Min'.format(int(h),int(m))
+    return hms
+def dict2string(loss_dict):
+    loss_string = ''
+    for key, value in loss_dict.items():
+        loss_string += key+' {:.4f}, '.format(value)
+    return loss_string[:-2]
+def mkdir(dir):
+    if not os.path.exists(dir):
+        os.makedirs(dir)
+def convert_state_dict(state_dict):
+    """Converts a state dict saved from a dataParallel module to normal
+       module state_dict inplace
+       :param state_dict is the loaded DataParallel model_state
+    """
+    new_state_dict = OrderedDict()
+    for k, v in state_dict.items():
+        name = k[7:] # remove `module.`
+        new_state_dict[name] = v
+    return new_state_dict
+def get_lr(optimizer):
+    for param_group in optimizer.param_groups:
+        return float(param_group['lr'])
+def torch2cvimg(tensor,min=0,max=1):
+    '''
+    input:
+        tensor -> torch.tensor BxCxHxW C can be 1,3
+    return
+        im -> ndarray uint8 HxWxC
+    '''
+    im_list = []
+    for i in range(tensor.shape[0]):
+        im = tensor.detach().cpu().data.numpy()[i]
+        im = im.transpose(1,2,0)
+        im = np.clip(im,min,max)
+        im = ((im-min)/(max-min)*255).astype(np.uint8)
+        im_list.append(im)
+    return im_list
+def cvimg2torch(img,min=0,max=1):
+    '''
+    input:
+        im -> ndarray uint8 HxWxC
+    return
+        tensor -> torch.tensor BxCxHxW
+    '''
+    img = img.astype(float) / 255.0
+    img = img.transpose(2, 0, 1) # NHWC -> NCHW
+    img = np.expand_dims(img, 0)
+    img = torch.from_numpy(img).float()
+    return img
+def setup_seed(seed):
+    # np.random.seed(seed)
+    # random.seed(seed)
+    # torch.manual_seed(seed) #cpu
+    # torch.cuda.manual_seed_all(seed)  #并行gpu
+    torch.backends.cudnn.deterministic = True  #cpu/gpu结果一致
+    # torch.backends.cudnn.benchmark = False   #训练集变化不大时使训练加速
+def SauvolaModBinarization(image,n1=51,n2=51,k1=0.3,k2=0.3,default=True):
+    '''
+	 Binarization using Sauvola's algorithm
+		@name : SauvolaModBinarization
+	 parameters
+		@param image (numpy array of shape (3/1) of type np.uint8): color or gray scale image
+	 optional parameters
+		@param n1 (int) : window size for running sauvola during the first pass
+		@param n2 (int): window size for running sauvola during the second pass
+		@param k1 (float): k value corresponding to sauvola during the first pass
+		@param k2 (float): k value corresponding to sauvola during the second pass
+		@param default (bool) : bollean variable to set the above parameter as default.
+			@param default is set to True : thus default values of the above optional parameters (n1,n2,k1,k2) are set to
+				n1 = 5 % of min(image height, image width)
+				n2 = 10 % of min(image height, image width)
+				k1 = 0.5
+				k2 = 0.5
+		Returns
+			@return A binary image of same size as @param image
+		@cite https://drive.google.com/file/d/1D3CyI5vtodPJeZaD2UV5wdcaIMtkBbdZ/view?usp=sharing
+    '''
+    if(default):
+        n1 = int(0.05*min(image.shape[0],image.shape[1]))
+        if (n1%2==0):
+            n1 = n1+1
+        n2 = int(0.1*min(image.shape[0],image.shape[1]))
+        if (n2%2==0):
+            n2 = n2+1
+        k1 = 0.5
+        k2 = 0.5
+    if(image.ndim==3):
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    else:
+        gray = np.copy(image)
+    T1 = threshold_sauvola(gray, window_size=n1,k=k1)
+    max_val = np.amax(gray)
+    min_val = np.amin(gray)
+    C = np.copy(T1)
+    C = C.astype(np.float32)
+    C[gray > T1] = (gray[gray > T1] - T1[gray > T1])/(max_val - T1[gray > T1])
+    C[gray <= T1] = 0
+    C = C * 255.0
+    new_in = np.copy(C.astype(np.uint8))
+    T2 = threshold_sauvola(new_in, window_size=n2,k=k2)
+    binary = np.copy(gray)
+    binary[new_in <= T2] = 0
+    binary[new_in > T2] = 255
+    return binary,T2
+def getBasecoord(h,w):
+    base_coord0 = np.tile(np.arange(h).reshape(h,1),(1,w)).astype(np.float32)
+    base_coord1 = np.tile(np.arange(w).reshape(1,w),(h,1)).astype(np.float32)
+    base_coord = np.concatenate((np.expand_dims(base_coord1,-1),np.expand_dims(base_coord0,-1)),-1)
+    return base_coord
+import numpy as np
+from scipy import ndimage as ndi
+# lookup tables for bwmorph_thin
+G123_LUT = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,
+       0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0,
+       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
+       0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1,
+       0, 0, 0], dtype=np.bool_)
+G123P_LUT = np.array([0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
+       1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0,
+       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0,
+       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
+       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0], dtype=np.bool_)
+def bwmorph(image, n_iter=None):
+    """
+    Perform morphological thinning of a binary image
+    Parameters
+    ----------
+    image : binary (M, N) ndarray
+        The image to be thinned.
+    n_iter : int, number of iterations, optional
+        Regardless of the value of this parameter, the thinned image
+        is returned immediately if an iteration produces no change.
+        If this parameter is specified it thus sets an upper bound on
+        the number of iterations performed.
+    Returns
+    -------
+    out : ndarray of bools
+        Thinned image.
+    See also
+    --------
+    skeletonize
+    Notes
+    -----
+    This algorithm [1]_ works by making multiple passes over the image,
+    removing pixels matching a set of criteria designed to thin
+    connected regions while preserving eight-connected components and
+    2 x 2 squares [2]_. In each of the two sub-iterations the algorithm
+    correlates the intermediate skeleton image with a neighborhood mask,
+    then looks up each neighborhood in a lookup table indicating whether
+    the central pixel should be deleted in that sub-iteration.
+    References
+    ----------
+    .. [1] Z. Guo and R. W. Hall, "Parallel thinning with
+           two-subiteration algorithms," Comm. ACM, vol. 32, no. 3,
+           pp. 359-373, 1989.
+    .. [2] Lam, L., Seong-Whan Lee, and Ching Y. Suen, "Thinning
+           Methodologies-A Comprehensive Survey," IEEE Transactions on
+           Pattern Analysis and Machine Intelligence, Vol 14, No. 9,
+           September 1992, p. 879
+    Examples
+    --------
+    >>> square = np.zeros((7, 7), dtype=np.uint8)
+    >>> square[1:-1, 2:-2] = 1
+    >>> square[0,1] =  1
+    >>> square
+    array([[0, 1, 0, 0, 0, 0, 0],
+           [0, 0, 1, 1, 1, 0, 0],
+           [0, 0, 1, 1, 1, 0, 0],
+           [0, 0, 1, 1, 1, 0, 0],
+           [0, 0, 1, 1, 1, 0, 0],
+           [0, 0, 1, 1, 1, 0, 0],
+           [0, 0, 0, 0, 0, 0, 0]], dtype=uint8)
+    >>> skel = bwmorph_thin(square)
+    >>> skel.astype(np.uint8)
+    array([[0, 1, 0, 0, 0, 0, 0],
+           [0, 0, 1, 0, 0, 0, 0],
+           [0, 0, 0, 1, 0, 0, 0],
+           [0, 0, 0, 1, 0, 0, 0],
+           [0, 0, 0, 1, 0, 0, 0],
+           [0, 0, 0, 0, 0, 0, 0],
+           [0, 0, 0, 0, 0, 0, 0]], dtype=uint8)
+    """
+    # check parameters
+    if n_iter is None:
+        n = -1
+    elif n_iter <= 0:
+        raise ValueError('n_iter must be > 0')
+    else:
+        n = n_iter
+    # check that we have a 2d binary image, and convert it
+    # to uint8
+    skel = np.array(image).astype(np.uint8)
+    if skel.ndim != 2:
+        raise ValueError('2D array required')
+    if not np.all(np.in1d(image.flat,(0,1))):
+        raise ValueError('Image contains values other than 0 and 1')
+    # neighborhood mask
+    mask = np.array([[ 8,  4,  2],
+                     [16,  0,  1],
+                     [32, 64,128]],dtype=np.uint8)
+    # iterate either 1) indefinitely or 2) up to iteration limit
+    while n != 0:
+        before = np.sum(skel) # count points before thinning
+        # for each subiteration
+        for lut in [G123_LUT, G123P_LUT]:
+            # correlate image with neighborhood mask
+            N = ndi.correlate(skel, mask, mode='constant')
+            # take deletion decision from this subiteration's LUT
+            D = np.take(lut, N)
+            # perform deletion
+            skel[D] = 0
+        after = np.sum(skel) # coint points after thinning
+        if before == after:
+            # iteration had no effect: finish
+            break
+        # count down to iteration limit (or endlessly negative)
+        n -= 1
+    return skel.astype(np.bool_)
+"""
+# here's how to make the LUTs
+def nabe(n):
+    return np.array([n>>i&1 for i in range(0,9)]).astype(np.bool_)
+def hood(n):
+    return np.take(nabe(n), np.array([[3, 2, 1],
+                                      [4, 8, 0],
+                                      [5, 6, 7]]))
+def G1(n):
+    s = 0
+    bits = nabe(n)
+    for i in (0,2,4,6):
+        if not(bits[i]) and (bits[i+1] or bits[(i+2) % 8]):
+            s += 1
+    return s==1
+g1_lut = np.array([G1(n) for n in range(256)])
+def G2(n):
+    n1, n2 = 0, 0
+    bits = nabe(n)
+    for k in (1,3,5,7):
+        if bits[k] or bits[k-1]:
+            n1 += 1
+        if bits[k] or bits[(k+1) % 8]:
+            n2 += 1
+    return min(n1,n2) in [2,3]
+g2_lut = np.array([G2(n) for n in range(256)])
+g12_lut = g1_lut & g2_lut
+def G3(n):
+    bits = nabe(n)
+    return not((bits[1] or bits[2] or not(bits[7])) and bits[0])
+def G3p(n):
+    bits = nabe(n)
+    return not((bits[5] or bits[6] or not(bits[3])) and bits[4])
+g3_lut = np.array([G3(n) for n in range(256)])
+g3p_lut = np.array([G3p(n) for n in range(256)])
+g123_lut  = g12_lut & g3_lut
+g123p_lut = g12_lut & g3p_lut
+"""
+"""
+author : Peb Ruswono Aryan
+metric for evaluating binarization algorithms
+implemented :
+ * F-Measure
+ * pseudo F-Measure (as in H-DIBCO 2010 & 2012)
+ * Peak Signal to Noise Ratio (PSNR)
+ * Negative Rate Measure (NRM)
+ * Misclassification Penaltiy Measure (MPM)
+ * Distance Reciprocal Distortion (DRD)
+usage:
+	python metric.py test-image.png ground-truth-image.png
+"""
+def drd_fn(im, im_gt):
+	height, width = im.shape
+	neg = np.zeros(im.shape)
+	neg[im_gt!=im] = 1
+	y, x = np.unravel_index(np.flatnonzero(neg), im.shape)
+	n = 2
+	m = n*2+1
+	W = np.zeros((m,m), dtype=np.uint8)
+	W[n,n] = 1.
+	W = cv2.distanceTransform(1-W, cv2.DIST_L2, cv2.DIST_MASK_PRECISE)
+	W[n,n] = 1.
+	W = 1./W
+	W[n,n] = 0.
+	W /= W.sum()
+	nubn = 0.
+	block_size = 8
+	for y1 in range(0, height, block_size):
+		for x1 in range(0, width, block_size):
+			y2 = min(y1+block_size-1,height-1)
+			x2 = min(x1+block_size-1,width-1)
+			block_dim = (x2-x1+1)*(y1-y1+1)
+			block = 1-im_gt[y1:y2, x1:x2]
+			block_sum = np.sum(block)
+			if block_sum>0 and block_sum<block_dim:
+				nubn += 1
+	drd_sum= 0.
+	tmp = np.zeros(W.shape)
+	for i in range(min(1,len(y))):
+		tmp[:,:] = 0
+		x1 = max(0, x[i]-n)
+		y1 = max(0, y[i]-n)
+		x2 = min(width-1, x[i]+n)
+		y2 = min(height-1, y[i]+n)
+		yy1 = y1-y[i]+n
+		yy2 = y2-y[i]+n
+		xx1 = x1-x[i]+n
+		xx2 = x2-x[i]+n
+		tmp[yy1:yy2+1,xx1:xx2+1] = np.abs(im[y[i],x[i]]-im_gt[y1:y2+1,x1:x2+1])
+		tmp *= W
+		drd_sum += np.sum(tmp)
+	return drd_sum/nubn
+def bin_metric(im,im_gt):
+	height, width = im.shape
+	npixel = height*width
+	im[im>0] = 1
+	gt_mask = im_gt==0
+	im_gt[im_gt>0] = 1
+	sk = bwmorph(1-im_gt)
+	im_sk = np.ones(im_gt.shape)
+	im_sk[sk] = 0
+	kernel = np.ones((3,3), dtype=np.uint8)
+	im_dil = cv2.erode(im_gt, kernel)
+	im_gtb = im_gt-im_dil
+	im_gtbd = cv2.distanceTransform(1-im_gtb, cv2.DIST_L2, 3)
+	nd = im_gtbd.sum()
+	ptp = np.zeros(im_gt.shape)
+	ptp[(im==0) & (im_sk==0)] = 1
+	numptp = ptp.sum()
+	tp = np.zeros(im_gt.shape)
+	tp[(im==0) & (im_gt==0)] = 1
+	numtp = tp.sum()
+	tn = np.zeros(im_gt.shape)
+	tn[(im==1) & (im_gt==1)] = 1
+	numtn = tn.sum()
+	fp = np.zeros(im_gt.shape)
+	fp[(im==0) & (im_gt==1)] = 1
+	numfp = fp.sum()
+	fn = np.zeros(im_gt.shape)
+	fn[(im==1) & (im_gt==0)] = 1
+	numfn = fn.sum()
+	precision = numtp / (numtp + numfp)
+	recall = numtp / (numtp + numfn)
+	precall = numptp / np.sum(1-im_sk)
+	fmeasure = (2*recall*precision)/(recall+precision)
+	pfmeasure = (2*precall*precision)/(precall+precision)
+	mse = (numfp+numfn)/npixel
+	psnr = 10.*np.log10(1./mse)
+	nrfn = numfn / (numfn + numtp)
+	nrfp = numfp / (numfp + numtn)
+	nrm = (nrfn + nrfp)/2
+	im_dn = im_gtbd.copy()
+	im_dn[fn==0] = 0
+	dn = np.sum(im_dn)
+	mpfn = dn / nd
+	im_dp = im_gtbd.copy()
+	im_dp[fp==0] = 0
+	dp = np.sum(im_dp)
+	mpfp = dp / nd
+	mpm = (mpfp + mpfn) / 2
+	drd = drd_fn(im, im_gt)
+	return fmeasure, pfmeasure,psnr,nrm, mpm,drd
+	# print("F-measure\t: {0}\npF-measure\t: {1}\nPSNR\t\t: {2}\nNRM\t\t: {3}\nMPM\t\t: {4}\nDRD\t\t: {5}".format(fmeasure, pfmeasure, psnr, nrm, mpm, drd))