Spaces:

ankankbhunia
/

HWT

Running

App Files Files Community

ankankbhunia commited on Feb 20, 2024

Commit

13580fb

verified ·

1 Parent(s): 43b682f

INIT

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

app.py +81 -0
data/__pycache__/dataset.cpython-36.pyc +0 -0
data/__pycache__/dataset.cpython-37.pyc +0 -0
data/__pycache__/dataset.cpython-38.pyc +0 -0
data/__pycache__/dataset.cpython-39.pyc +0 -0
data/create_data.py +18 -0
data/dataset.py +247 -0
data/prepare_data.py +458 -0
files/.DS_Store +0 -0
files/english_words.txt +0 -0
files/example_data/style-1/im_0.png +0 -0
files/example_data/style-1/im_1.png +0 -0
files/example_data/style-1/im_10.png +0 -0
files/example_data/style-1/im_11.png +0 -0
files/example_data/style-1/im_12.png +0 -0
files/example_data/style-1/im_13.png +0 -0
files/example_data/style-1/im_14.png +0 -0
files/example_data/style-1/im_2.png +0 -0
files/example_data/style-1/im_3.png +0 -0
files/example_data/style-1/im_4.png +0 -0
files/example_data/style-1/im_5.png +0 -0
files/example_data/style-1/im_6.png +0 -0
files/example_data/style-1/im_7.png +0 -0
files/example_data/style-1/im_8.png +0 -0
files/example_data/style-1/im_9.png +0 -0
files/example_data/style-10/im_0.png +0 -0
files/example_data/style-10/im_1.png +0 -0
files/example_data/style-10/im_10.png +0 -0
files/example_data/style-10/im_11.png +0 -0
files/example_data/style-10/im_12.png +0 -0
files/example_data/style-10/im_13.png +0 -0
files/example_data/style-10/im_14.png +0 -0
files/example_data/style-10/im_2.png +0 -0
files/example_data/style-10/im_3.png +0 -0
files/example_data/style-10/im_4.png +0 -0
files/example_data/style-10/im_5.png +0 -0
files/example_data/style-10/im_6.png +0 -0
files/example_data/style-10/im_7.png +0 -0
files/example_data/style-10/im_8.png +0 -0
files/example_data/style-10/im_9.png +0 -0
files/example_data/style-102/im_0.png +0 -0
files/example_data/style-102/im_1.png +0 -0
files/example_data/style-102/im_10.png +0 -0
files/example_data/style-102/im_11.png +0 -0
files/example_data/style-102/im_12.png +0 -0
files/example_data/style-102/im_13.png +0 -0
files/example_data/style-102/im_14.png +0 -0
files/example_data/style-102/im_2.png +0 -0
files/example_data/style-102/im_3.png +0 -0
files/example_data/style-102/im_4.png +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import gradio as gr
+from PIL import Image
+import numpy as np
+from io import BytesIO
+import glob
+import os
+import time
+from data.dataset import load_itw_samples, crop_
+import torch
+import cv2
+import os
+import numpy as np
+from models.model import TRGAN
+from params import *
+from torch import nn
+from data.dataset import get_transform
+import pickle
+from PIL import Image
+import tqdm
+import shutil
+model_path = 'files/iam_model.pth'
+batch_size = 1
+print ('(1) Loading model...')
+model = TRGAN(batch_size = batch_size)
+model.netG.load_state_dict(torch.load(model_path,  map_location=torch.device('cpu')) )
+print (model_path+' : Model loaded Successfully')
+model.eval()
+# Define a function to generate an image based on text and images
+def generate_image(text,folder, _ch3, images):
+    # Your image generation logic goes here (replace with your actual implementation)
+    # For demonstration purposes, we'll just concatenate the uploaded images horizontally.
+    if images:
+      style_inputs, width_length = load_itw_samples(images)
+    elif folder:
+      style_inputs, width_length = load_itw_samples(folder)
+    else:
+      return None
+    # Load images
+    text = text.replace("\n", "").replace("\t", "")
+    text_encode =  [j.encode() for j in text.split(' ')]
+    eval_text_encode, eval_len_text = model.netconverter.encode(text_encode)
+    eval_text_encode = eval_text_encode.to('cuda').repeat(batch_size, 1, 1)
+    input_styles, page_val = model._generate_page(style_inputs.to(DEVICE).clone(), width_length, eval_text_encode, eval_len_text, no_concat = True)
+    page_val = crop_(page_val[0]*255)
+    input_styles = crop_(input_styles[0]*255)
+    max_width = max(page_val.shape[1],input_styles.shape[1])
+    if page_val.shape[1]!=max_width:
+      page_val = np.concatenate([page_val, np.ones((page_val.shape[0],max_width-page_val.shape[1]))*255], 1)
+    else:
+      input_styles = np.concatenate([input_styles, np.ones((input_styles.shape[0],max_width-input_styles.shape[1]))*255], 1)
+    upper_pad = np.ones((45,input_styles.shape[1]))*255
+    input_styles = np.concatenate([upper_pad, input_styles], 0)
+    page_val = np.concatenate([upper_pad, page_val], 0)
+    page_val = Image.fromarray(page_val).convert('RGB')
+    input_styles = Image.fromarray(input_styles).convert('RGB')
+    return input_styles, page_val
+# Define Gradio Interface
+iface = gr.Interface(
+    fn=generate_image,
+    inputs=[
+        gr.Textbox(value = "In the quiet hum of everyday life, the dance of existence unfolds. Time, an ever-flowing river, carries the stories of triumph and heartache. Each fleeting moment is a brushstroke on the canvas of our memories. Within the tapestry of human connection, threads of empathy weave a fabric that binds us all. Nature's symphony plays, a harmonious blend of rustling leaves and birdsong. In the labyrinth of possibility, dreams take flight. Beneath the veneer of routine, lies the extraordinary. Embrace the kaleidoscope of experience, for in the ordinary, the extraordinary often reveals itself.",label = "Input text"),
+        gr.Dropdown(value = "files/example_data/style-30", choices=glob.glob('files/example_data/*'), label="Choose from provided writer styles"),
+        gr.Markdown("### OR"),
+        gr.File(label="Upload multiple word images", file_count="multiple")
+    ],
+    outputs=[#gr.Markdown("## Output"),
+             gr.Image(type="pil", label="Style Image"),
+             gr.Image(type="pil", label="Generated Image")]
+)
+# Launch the Gradio Interface
+iface.launch(debug=True, share=True)

data/__pycache__/dataset.cpython-36.pyc ADDED Viewed

Binary file (6.37 kB). View file

data/__pycache__/dataset.cpython-37.pyc ADDED Viewed

Binary file (6.39 kB). View file

data/__pycache__/dataset.cpython-38.pyc ADDED Viewed

Binary file (7.77 kB). View file

data/__pycache__/dataset.cpython-39.pyc ADDED Viewed

Binary file (5.96 kB). View file

data/create_data.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import random
+import torch
+from torch.utils.data import Dataset
+from torch.utils.data import sampler
+import lmdb
+import torchvision.transforms as transforms
+import six
+import sys
+from PIL import Image
+import numpy as np
+import os
+import sys
+import pickle
+import numpy as np
+import glob
+glob.glob('/nfs/users/ext_ankan.bhunia/Handwritten_data/CVL/cvl-database-1-1/*/words/*/*tif')

data/dataset.py ADDED Viewed

	@@ -0,0 +1,247 @@

+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: MIT
+import random
+import torch
+from torch.utils.data import Dataset
+from torch.utils.data import sampler
+#import lmdb
+import torchvision.transforms as transforms
+import six
+import sys
+from PIL import Image
+import numpy as np
+import os
+import sys
+import pickle
+import numpy as np
+from params import *
+import glob, cv2
+import torchvision.transforms as transforms
+def crop_(input):
+    image = Image.fromarray(input)
+    image = image.convert('L')
+    binary_image = image.point(lambda x: 0 if x > 127 else 255, '1')
+    bbox = binary_image.getbbox()
+    cropped_image = image.crop(bbox)
+    return np.array(cropped_image)
+def get_transform(grayscale=False, convert=True):
+    transform_list = []
+    if grayscale:
+        transform_list.append(transforms.Grayscale(1))
+    if convert:
+        transform_list += [transforms.ToTensor()]
+        if grayscale:
+            transform_list += [transforms.Normalize((0.5,), (0.5,))]
+        else:
+            transform_list += [transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
+    return transforms.Compose(transform_list)
+def load_itw_samples(folder_path, num_samples = 15):
+  if isinstance(folder_path, str):
+    paths = glob.glob(f'{folder_path}/*')
+  else:
+    paths = folder_path
+  paths = np.random.choice(paths, num_samples, replace = len(paths)<=num_samples)
+  words = [os.path.basename(path_i)[:-4] for path_i in paths]
+  imgs = [np.array(Image.open(i).convert('L')) for i in paths]
+  imgs =  [crop_(im) for im in imgs]
+  imgs = [cv2.resize(imgs_i, (int(32*(imgs_i.shape[1]/imgs_i.shape[0])), 32)) for imgs_i in imgs]
+  max_width = 192
+  imgs_pad = []
+  imgs_wids = []
+  trans_fn = get_transform(grayscale=True)
+  for img in imgs:
+      img = 255 - img
+      img_height, img_width = img.shape[0], img.shape[1]
+      outImg = np.zeros(( img_height, max_width), dtype='float32')
+      outImg[:, :img_width] = img[:, :max_width]
+      img = 255 - outImg
+      imgs_pad.append(trans_fn((Image.fromarray(img))))
+      imgs_wids.append(img_width)
+  imgs_pad = torch.cat(imgs_pad, 0)
+  return imgs_pad.unsqueeze(0), torch.Tensor(imgs_wids).unsqueeze(0)
+class TextDataset():
+    def __init__(self, base_path = DATASET_PATHS,  num_examples = 15, target_transform=None):
+        self.NUM_EXAMPLES = num_examples
+        #base_path = DATASET_PATHS
+        file_to_store = open(base_path, "rb")
+        self.IMG_DATA = pickle.load(file_to_store)['train']
+        self.IMG_DATA  = dict(list( self.IMG_DATA.items())) #[:NUM_WRITERS])
+        if 'None' in self.IMG_DATA.keys():
+            del self.IMG_DATA['None']
+        self.author_id = list(self.IMG_DATA.keys())
+        self.transform = get_transform(grayscale=True)
+        self.target_transform = target_transform
+        self.collate_fn = TextCollator()
+    def __len__(self):
+        return len(self.author_id)
+    def __getitem__(self, index):
+        NUM_SAMPLES = self.NUM_EXAMPLES
+        author_id = self.author_id[index]
+        self.IMG_DATA_AUTHOR = self.IMG_DATA[author_id]
+        random_idxs = np.random.choice(len(self.IMG_DATA_AUTHOR), NUM_SAMPLES, replace = True)
+        rand_id_real = np.random.choice(len(self.IMG_DATA_AUTHOR))
+        real_img = self.transform(self.IMG_DATA_AUTHOR[rand_id_real]['img'].convert('L'))
+        real_labels = self.IMG_DATA_AUTHOR[rand_id_real]['label'].encode()
+        imgs = [np.array(self.IMG_DATA_AUTHOR[idx]['img'].convert('L')) for idx in random_idxs]
+        labels = [self.IMG_DATA_AUTHOR[idx]['label'].encode() for idx in random_idxs]
+        max_width = 192 #[img.shape[1] for img in imgs]
+        imgs_pad = []
+        imgs_wids = []
+        for img in imgs:
+            img = 255 - img
+            img_height, img_width = img.shape[0], img.shape[1]
+            outImg = np.zeros(( img_height, max_width), dtype='float32')
+            outImg[:, :img_width] = img[:, :max_width]
+            img = 255 - outImg
+            imgs_pad.append(self.transform((Image.fromarray(img))))
+            imgs_wids.append(img_width)
+        imgs_pad = torch.cat(imgs_pad, 0)
+        item = {'simg': imgs_pad, 'swids':imgs_wids, 'img' : real_img, 'label':real_labels,'img_path':'img_path', 'idx':'indexes', 'wcl':index}
+        return item
+class TextDatasetval():
+    def __init__(self, base_path = DATASET_PATHS, num_examples = 15, target_transform=None):
+        self.NUM_EXAMPLES = num_examples
+        #base_path = DATASET_PATHS
+        file_to_store = open(base_path, "rb")
+        self.IMG_DATA = pickle.load(file_to_store)['test']
+        self.IMG_DATA  = dict(list( self.IMG_DATA.items()))#[NUM_WRITERS:])
+        if 'None' in self.IMG_DATA.keys():
+            del self.IMG_DATA['None']
+        self.author_id = list(self.IMG_DATA.keys())
+        self.transform = get_transform(grayscale=True)
+        self.target_transform = target_transform
+        self.collate_fn = TextCollator()
+    def __len__(self):
+        return len(self.author_id)
+    def __getitem__(self, index):
+        NUM_SAMPLES = self.NUM_EXAMPLES
+        author_id = self.author_id[index]
+        self.IMG_DATA_AUTHOR = self.IMG_DATA[author_id]
+        random_idxs = np.random.choice(len(self.IMG_DATA_AUTHOR), NUM_SAMPLES, replace = True)
+        rand_id_real = np.random.choice(len(self.IMG_DATA_AUTHOR))
+        real_img = self.transform(self.IMG_DATA_AUTHOR[rand_id_real]['img'].convert('L'))
+        real_labels = self.IMG_DATA_AUTHOR[rand_id_real]['label'].encode()
+        imgs = [np.array(self.IMG_DATA_AUTHOR[idx]['img'].convert('L')) for idx in random_idxs]
+        labels = [self.IMG_DATA_AUTHOR[idx]['label'].encode() for idx in random_idxs]
+        max_width = 192 #[img.shape[1] for img in imgs]
+        imgs_pad = []
+        imgs_wids = []
+        for img in imgs:
+            img = 255 - img
+            img_height, img_width = img.shape[0], img.shape[1]
+            outImg = np.zeros(( img_height, max_width), dtype='float32')
+            outImg[:, :img_width] = img[:, :max_width]
+            img = 255 - outImg
+            imgs_pad.append(self.transform((Image.fromarray(img))))
+            imgs_wids.append(img_width)
+        imgs_pad = torch.cat(imgs_pad, 0)
+        item = {'simg': imgs_pad, 'swids':imgs_wids, 'img' : real_img, 'label':real_labels,'img_path':'img_path', 'idx':'indexes', 'wcl':index}
+        return item
+class TextCollator(object):
+    def __init__(self):
+        self.resolution = resolution
+    def __call__(self, batch):
+        img_path = [item['img_path'] for item in batch]
+        width = [item['img'].shape[2] for item in batch]
+        indexes = [item['idx'] for item in batch]
+        simgs =  torch.stack([item['simg'] for item in batch], 0)
+        wcls =  torch.Tensor([item['wcl'] for item in batch])
+        swids =  torch.Tensor([item['swids'] for item in batch])
+        imgs = torch.ones([len(batch), batch[0]['img'].shape[0], batch[0]['img'].shape[1], max(width)], dtype=torch.float32)
+        for idx, item in enumerate(batch):
+            try:
+                imgs[idx, :, :, 0:item['img'].shape[2]] = item['img']
+            except:
+                print(imgs.shape)
+        item = {'img': imgs, 'img_path':img_path, 'idx':indexes, 'simg': simgs, 'swids': swids, 'wcl':wcls}
+        if 'label' in batch[0].keys():
+            labels = [item['label'] for item in batch]
+            item['label'] = labels
+        if 'z' in batch[0].keys():
+            z = torch.stack([item['z'] for item in batch])
+            item['z'] = z
+        return item

data/prepare_data.py ADDED Viewed

	@@ -0,0 +1,458 @@

+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: MIT
+import os
+import lmdb, tqdm
+import cv2
+import numpy as np
+import argparse
+import shutil
+import sys
+from PIL import Image
+import random
+import io
+import xmltodict
+import html
+from sklearn.decomposition import PCA
+import math
+from tqdm import tqdm
+from itertools import compress
+import glob
+def checkImageIsValid(imageBin):
+    if imageBin is None:
+        return False
+    imageBuf = np.fromstring(imageBin, dtype=np.uint8)
+    img = cv2.imdecode(imageBuf, cv2.IMREAD_GRAYSCALE)
+    imgH, imgW = img.shape[0], img.shape[1]
+    if imgH * imgW == 0:
+        return False
+    return True
+def writeCache(env, cache):
+    with env.begin(write=True) as txn:
+        for k, v in cache.items():
+            if type(k) == str:
+                k = k.encode()
+            if type(v) == str:
+                v = v.encode()
+            txn.put(k, v)
+def find_rot_angle(idx_letters):
+    idx_letters = np.array(idx_letters).transpose()
+    pca = PCA(n_components=2)
+    pca.fit(idx_letters)
+    comp = pca.components_
+    angle = math.atan(comp[0][0]/comp[0][1])
+    return math.degrees(angle)
+def read_data_from_folder(folder_path):
+    image_path_list = []
+    label_list = []
+    pics = os.listdir(folder_path)
+    pics.sort(key=lambda i: len(i))
+    for pic in pics:
+        image_path_list.append(folder_path + '/' + pic)
+        label_list.append(pic.split('_')[0])
+    return image_path_list, label_list
+def read_data_from_file(file_path):
+    image_path_list = []
+    label_list = []
+    f = open(file_path)
+    while True:
+        line1 = f.readline()
+        line2 = f.readline()
+        if not line1 or not line2:
+            break
+        line1 = line1.replace('\r', '').replace('\n', '')
+        line2 = line2.replace('\r', '').replace('\n', '')
+        image_path_list.append(line1)
+        label_list.append(line2)
+    return image_path_list, label_list
+def show_demo(demo_number, image_path_list, label_list):
+    print('\nShow some demo to prevent creating wrong lmdb data')
+    print('The first line is the path to image and the second line is the image label')
+    for i in range(demo_number):
+        print('image: %s\nlabel: %s\n' % (image_path_list[i], label_list[i]))
+def create_img_label_list(top_dir,dataset, mode, words, author_number, remove_punc):
+    root_dir = os.path.join(top_dir, dataset)
+    output_dir = root_dir + (dataset=='IAM')*('/words'*words + '/lines'*(not words))
+    image_path_list, label_list = [], []
+    author_id = 'None'
+    mode = 'all'
+    if dataset=='CVL':
+        root_dir = os.path.join(root_dir, 'cvl-database-1-1')
+        if words:
+            images_name = 'words'
+        else:
+            images_name = 'lines'
+        if mode == 'tr' or mode == 'val':
+            mode_dir = ['trainset']
+        elif mode == 'te':
+            mode_dir = ['testset']
+        elif mode == 'all':
+            mode_dir = ['testset', 'trainset']
+        idx = 1
+        for mod in mode_dir:
+            images_dir = os.path.join(root_dir, mod, images_name)
+            for path, subdirs, files in os.walk(images_dir):
+                for name in files:
+                    if (mode == 'tr' and idx >= 10000) or (
+                            mode == 'val' and idx < 10000) or mode == 'te' or mode == 'all' or mode == 'tr_3te':
+                        if os.path.splitext(name)[0].split('-')[1] == '6':
+                            continue
+                        label = os.path.splitext(name)[0].split('-')[-1]
+                        imagePath = os.path.join(path, name)
+                        label_list.append(label)
+                        image_path_list.append(imagePath)
+                    idx += 1
+    elif dataset=='IAM':
+        labels_name = 'original'
+        if mode=='all':
+            mode = ['te', 'va1', 'va2', 'tr']
+        elif mode=='valtest':
+            mode=['te', 'va1', 'va2']
+        else:
+            mode = [mode]
+        if words:
+            images_name = 'wordImages'
+        else:
+            images_name = 'lineImages'
+        images_dir = os.path.join(root_dir, images_name)
+        labels_dir = os.path.join(root_dir, labels_name)
+        full_ann_files = []
+        im_dirs = []
+        line_ann_dirs = []
+        image_path_list, label_list = [], []
+        for mod in mode:
+            part_file = os.path.join(root_dir, 'original_partition', mod + '.lst')
+            with open(part_file)as fp:
+                for line in fp:
+                    name = line.split('-')
+                    if int(name[-1][:-1]) == 0:
+                        anno_file = os.path.join(labels_dir, '-'.join(name[:2]) + '.xml')
+                        full_ann_files.append(anno_file)
+                        im_dir = os.path.join(images_dir, name[0], '-'.join(name[:2]))
+                        im_dirs.append(im_dir)
+        if author_number >= 0:
+            full_ann_files = [full_ann_files[author_number]]
+            im_dirs = [im_dirs[author_number]]
+            author_id = im_dirs[0].split('/')[-1]
+        lables_to_skip = ['.', '', ',', '"', "'", '(', ')', ':', ';', '!']
+        for i, anno_file in enumerate(full_ann_files):
+            with open(anno_file) as f:
+                try:
+                    line = f.read()
+                    annotation_content = xmltodict.parse(line)
+                    lines = annotation_content['form']['handwritten-part']['line']
+                    if words:
+                        lines_list = []
+                        for j in range(len(lines)):
+                            lines_list.extend(lines[j]['word'])
+                        lines = lines_list
+                except:
+                    print('line is not decodable')
+                for line in lines:
+                    try:
+                        label = html.unescape(line['@text'])
+                    except:
+                        continue
+                    if remove_punc and label in lables_to_skip:
+                        continue
+                    id = line['@id']
+                    imagePath = os.path.join(im_dirs[i], id + '.png')
+                    image_path_list.append(imagePath)
+                    label_list.append(label)
+    elif dataset=='RIMES':
+        if mode=='tr':
+            images_dir = os.path.join(root_dir, 'orig','training_WR')
+            gt_file = os.path.join(root_dir, 'orig',
+                               'groundtruth_training_icdar2011.txt')
+        elif mode=='te':
+            images_dir = os.path.join(root_dir, 'orig', 'testdataset_ICDAR')
+            gt_file = os.path.join(root_dir, 'orig',
+                                       'ground_truth_test_icdar2011.txt')
+        elif mode=='val':
+            images_dir = os.path.join(root_dir, 'orig', 'valdataset_ICDAR')
+            gt_file = os.path.join(root_dir, 'orig',
+                                       'ground_truth_validation_icdar2011.txt')
+        with open(gt_file, 'r') as f:
+            lines = f.readlines()
+        image_path_list = [os.path.join(images_dir, line.split(' ')[0]) for line in lines if len(line.split(' ')) > 1]
+        label_list = [line.split(' ')[1][:-1] for line in lines if len(line.split(' ')) > 1]
+    return image_path_list, label_list, output_dir, author_id
+def createDataset(IMG_DATA, image_path_list, label_list, outputPath, mode, author_id, remove_punc, resize, imgH, init_gap, h_gap, charminW, charmaxW, discard_wide, discard_narr, labeled):
+    assert (len(image_path_list) == len(label_list))
+    nSamples = len(image_path_list)
+    outputPath = outputPath + (resize=='charResize') * ('/h%schar%sto%s/'%(imgH, charminW, charmaxW)) + (resize=='keepRatio') * ('/h%s/'%(imgH)) \
+                 + (resize=='noResize') * ('/noResize/') + (author_id!='None') * ('single_authors/'+author_id+'/' ) \
+                 + mode + (resize!='noResize') * (('_initGap%s'%(init_gap)) * (init_gap>0) + ('_hGap%s'%(h_gap)) * (h_gap>0) \
+                 + '_NoDiscard_wide' * (not discard_wide) + '_NoDiscard_wide' * (not discard_narr))+'_unlabeld' * (not labeled) +\
+                 (('IAM' in outputPath) and remove_punc) *'_removePunc'
+    outputPath_ = '/root/Handwritten_data/IAM/authors' + (resize=='charResize') * ('/h%schar%sto%s/'%(imgH, charminW, charmaxW)) + (resize=='keepRatio') * ('/h%s/'%(imgH)) \
+                 + (resize=='noResize') * ('/noResize/') + (author_id!='None') * ('single_authors/'+author_id+'/' ) \
+                 + mode + (resize!='noResize') * (('_initGap%s'%(init_gap)) * (init_gap>0) + ('_hGap%s'%(h_gap)) * (h_gap>0) \
+                 + '_NoDiscard_wide' * (not discard_wide) + '_NoDiscard_wide' * (not discard_narr))+'_unlabeld' * (not labeled) +\
+                 (('IAM' in outputPath) and remove_punc) *'_removePunc'
+    print(outputPath)
+    if os.path.exists(outputPath):
+        shutil.rmtree(outputPath)
+        os.makedirs(outputPath)
+    else:
+        os.makedirs(outputPath)
+    env = lmdb.open(outputPath, map_size=1099511627776)
+    cache = {}
+    cnt = 1
+    discard_wide = False
+    for i in tqdm(range(nSamples)):
+        imagePath = image_path_list[i]
+        #author_id = image_path_list[i].split('/')[-2]
+        label = label_list[i]
+        if not os.path.exists(imagePath):
+            print('%s does not exist' % imagePath)
+            continue
+        try:
+            im = Image.open(imagePath)
+        except:
+            continue
+        if resize in ['charResize', 'keepRatio']:
+            width, height = im.size
+            new_height = imgH - (h_gap * 2)
+            len_word = len(label)
+            width = int(width * imgH / height)
+            new_width = width
+            if resize=='charResize':
+                if (width/len_word > (charmaxW-1)) or (width/len_word < charminW) :
+                    if discard_wide and width/len_word > 3*((charmaxW-1)):
+                        print('%s has a width larger than max image width' % imagePath)
+                        continue
+                    if discard_narr and (width / len_word) < (charminW/3):
+                        print('%s has a width smaller than min image width' % imagePath)
+                        continue
+                    else:
+                        new_width = len_word * random.randrange(charminW, charmaxW)
+            # reshapeRun all_gather on arbitrary picklable data (not necessarily tensors) the image to the new dimensions
+            im = im.resize((new_width, new_height))
+            # append with 256 to add left, upper and lower white edges
+            init_w = int(random.normalvariate(init_gap, init_gap / 2))
+            new_im = Image.new("RGB", (new_width+init_gap, imgH), color=(256,256,256))
+            new_im.paste(im, (abs(init_w), h_gap))
+            im = new_im
+        if author_id in IMG_DATA.keys():
+            IMG_DATA[author_id].append({'img':im, 'label':label})
+        else:
+            IMG_DATA[author_id] = []
+            IMG_DATA[author_id].append({'img':im, 'label':label})
+        imgByteArr = io.BytesIO()
+        #im.save(os.path.join(outputPath, 'IMG_'+str(cnt)+'_'+str(label)+'.jpg'))
+        im.save(imgByteArr, format='tiff')
+        wordBin = imgByteArr.getvalue()
+        imageKey = 'image-%09d' % cnt
+        labelKey = 'label-%09d' % cnt
+        cache[imageKey] = wordBin
+        if labeled:
+            cache[labelKey] = label
+        if cnt % 1000 == 0:
+            writeCache(env, cache)
+            cache = {}
+            print('Written %d / %d' % (cnt, nSamples))
+        cnt += 1
+    nSamples = cnt - 1
+    cache['num-samples'] = str(nSamples)
+    writeCache(env, cache)
+    env.close()
+    print('Created dataset with %d samples' % nSamples)
+    return IMG_DATA
+def createDict(label_list, top_dir, dataset, mode, words, remove_punc):
+    lex_name = dataset+'_' + mode + (dataset in ['IAM','RIMES'])*('_words' * words) + (dataset=='IAM') * ('_removePunc' * remove_punc)
+    all_words = '-'.join(label_list).split('-')
+    unique_words = []
+    words = []
+    for x in tqdm(all_words):
+        if x!='' and x!=' ':
+            words.append(x)
+            if x not in unique_words:
+                unique_words.append(x)
+    print(len(words))
+    print(len(unique_words))
+    with open(os.path.join(top_dir, 'Lexicon', lex_name+'_stratified.txt'), "w") as file:
+        file.write("\n".join(unique_words))
+    file.close()
+    with open(os.path.join(top_dir, 'Lexicon', lex_name + '_NOTstratified.txt'), "w") as file:
+        file.write("\n".join(words))
+    file.close()
+def printAlphabet(label_list):
+    # get all unique alphabets - ignoring alphabet longer than one char
+    all_chars = ''.join(label_list)
+    unique_chars = []
+    for x in all_chars:
+        if x not in unique_chars and len(x) == 1:
+            unique_chars.append(x)
+    # for unique_char in unique_chars:
+    print(''.join(unique_chars))
+if __name__ == '__main__':
+    TRAIN_IDX = 'gan.iam.tr_va.gt.filter27'
+    TEST_IDX =  'gan.iam.test.gt.filter27'
+    IAM_WORD_DATASET_PATH = '../../data/IAM/nfs/users/ext_ankan.bhunia/data/Handwritten_data/IAM/wordImages/'
+    XMLS_PATH = '../../data/IAM/nfs/users/ext_ankan.bhunia/data/Handwritten_data/IAM/xmls/'
+    word_paths = {i.split('/')[-1][:-4]:i for i in glob.glob(IAM_WORD_DATASET_PATH + '*/*/*.png')}
+    id_to_wid = {i.split('/')[-1][:-4]:xmltodict.parse(open(i).read())['form']['@writer-id'] for i in glob.glob(XMLS_PATH+'/**')}
+    trainslist = [i[:-1] for i in open(TRAIN_IDX, 'r').readlines()]
+    testslist = [i[:-1] for i in open(TEST_IDX, 'r').readlines()]
+    dict_ = {'train':{}, 'test':{}}
+    for i in trainslist:
+        author_id = i.split(',')[0]
+        file_id, string = i.split(',')[1].split(' ')
+        file_path = word_paths[file_id]
+        if author_id in dict_['train']:
+            dict_['train'][author_id].append({'path':file_path, 'label':string})
+        else:
+            dict_['train'][author_id] =  [{'path':file_path, 'label':string}]
+    for i in testslist:
+        author_id = i.split(',')[0]
+        file_id, string = i.split(',')[1].split(' ')
+        file_path = word_paths[file_id]
+        if author_id in dict_['test']:
+            dict_['test'][author_id].append({'path':file_path, 'label':string})
+        else:
+            dict_['test'][author_id] =  [{'path':file_path, 'label':string}]
+    create_Dict = True # create a dictionary of the generated dataset
+    dataset = 'IAM'     #CVL/IAM/RIMES/gw
+    mode = 'all'        # tr/te/val/va1/va2/all
+    labeled = True
+    top_dir = '../../data/IAM/nfs/users/ext_ankan.bhunia/data/Handwritten_data/'
+    # parameter relevant for IAM/RIMES:
+    words = True        # use words images, otherwise use lines
+    #parameters relevant for IAM:
+    author_number = -1  # use only images of a specific writer. If the value is -1, use all writers, otherwise use the index of this specific writer
+    remove_punc = True  # remove images which include only one punctuation mark from the list ['.', '', ',', '"', "'", '(', ')', ':', ';', '!']
+    resize = 'charResize'  # charResize|keepRatio|noResize - type of resize,
+                        # char - resize so that each character's width will be in a specific range (inside this range the width will be chosen randomly),
+                        # keepRatio - resize to a specific image height while keeping the height-width aspect-ratio the same.
+                        # noResize - do not resize the image
+    imgH = 32           # height of the resized image
+    init_gap = 0        # insert a gap before the beginning of the text with this number of pixels
+    charmaxW = 17       # The maximum character width
+    charminW = 16       # The minimum character width
+    h_gap = 0           # Insert a gap below and above the text
+    discard_wide = True # Discard images which have a character width 3 times larger than the maximum allowed character size (instead of resizing them) - this helps discard outlier images
+    discard_narr = True # Discard images which have a character width 3 times smaller than the minimum allowed charcter size.
+    IMG_DATA = {}
+    for idx_auth in range(1669999):
+        print ('Processing '+ str(idx_auth))
+        image_path_list, label_list, outputPath, author_id = create_img_label_list(top_dir,dataset, mode, words, idx_auth, remove_punc)
+        IMG_DATA[author_id] = []
+        # in a previous version we also cut the white edges of the image to keep a tight rectangle around the word but it
+        # seems in all the datasets we use this is already the case so I removed it. If there are problems maybe we should add this back.
+        IMG_DATA = createDataset(IMG_DATA, image_path_list, label_list, outputPath, mode, author_id, remove_punc, resize, imgH, init_gap, h_gap, charminW, charmaxW, discard_wide, discard_narr, labeled)
+        #if create_Dict:
+        #    createDict(label_list, top_dir, dataset, mode, words, remove_punc)
+        #printAlphabet(label_list)
+    import pickle
+    dict_ = {}
+    for id_ in IMG_DATA.keys():
+        author_id = id_to_wid[id_]
+        if author_id in dict_.keys():
+            dict_[author_id].extend(IMG_DATA[id_])
+        else:
+            dict_[author_id] = IMG_DATA[id_]
+    #pickle.dump(IMG_DATA, '/root/IAM')

files/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

files/english_words.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

files/example_data/style-1/im_0.png ADDED Viewed

files/example_data/style-1/im_1.png ADDED Viewed

files/example_data/style-1/im_10.png ADDED Viewed

files/example_data/style-1/im_11.png ADDED Viewed

files/example_data/style-1/im_12.png ADDED Viewed

files/example_data/style-1/im_13.png ADDED Viewed

files/example_data/style-1/im_14.png ADDED Viewed

files/example_data/style-1/im_2.png ADDED Viewed

files/example_data/style-1/im_3.png ADDED Viewed

files/example_data/style-1/im_4.png ADDED Viewed

files/example_data/style-1/im_5.png ADDED Viewed

files/example_data/style-1/im_6.png ADDED Viewed

files/example_data/style-1/im_7.png ADDED Viewed

files/example_data/style-1/im_8.png ADDED Viewed

files/example_data/style-1/im_9.png ADDED Viewed

files/example_data/style-10/im_0.png ADDED Viewed

files/example_data/style-10/im_1.png ADDED Viewed

files/example_data/style-10/im_10.png ADDED Viewed

files/example_data/style-10/im_11.png ADDED Viewed

files/example_data/style-10/im_12.png ADDED Viewed

files/example_data/style-10/im_13.png ADDED Viewed

files/example_data/style-10/im_14.png ADDED Viewed

files/example_data/style-10/im_2.png ADDED Viewed

files/example_data/style-10/im_3.png ADDED Viewed

files/example_data/style-10/im_4.png ADDED Viewed

files/example_data/style-10/im_5.png ADDED Viewed

files/example_data/style-10/im_6.png ADDED Viewed

files/example_data/style-10/im_7.png ADDED Viewed

files/example_data/style-10/im_8.png ADDED Viewed

files/example_data/style-10/im_9.png ADDED Viewed

files/example_data/style-102/im_0.png ADDED Viewed

files/example_data/style-102/im_1.png ADDED Viewed

files/example_data/style-102/im_10.png ADDED Viewed

files/example_data/style-102/im_11.png ADDED Viewed

files/example_data/style-102/im_12.png ADDED Viewed

files/example_data/style-102/im_13.png ADDED Viewed

files/example_data/style-102/im_14.png ADDED Viewed

files/example_data/style-102/im_2.png ADDED Viewed

files/example_data/style-102/im_3.png ADDED Viewed

files/example_data/style-102/im_4.png ADDED Viewed