import gradio as gr import tensorflow as tf import numpy as np import cv2 from PIL import Image from huggingface_hub import from_pretrained_keras def resize_image(img_in,input_height,input_width): return cv2.resize( img_in, ( input_width,input_height) ,interpolation=cv2.INTER_NEAREST) def otsu_copy_binary(img): img_r=np.zeros((img.shape[0],img.shape[1],3)) img1=img[:,:,0] retval1, threshold1 = cv2.threshold(img1, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) img_r[:,:,0]=threshold1 img_r[:,:,1]=threshold1 img_r[:,:,2]=threshold1 return img_r def visualize_model_output(prediction, img, model_name): if model_name == "SBB/eynollah-binarization": prediction = prediction * -1 prediction = prediction + 1 added_image = prediction * 255 else: unique_classes = np.unique(prediction[:,:,0]) rgb_colors = {'0' : [255, 255, 255], '1' : [255, 0, 0], '2' : [255, 125, 0], '3' : [255, 0, 125], '4' : [125, 125, 125], '5' : [125, 125, 0], '6' : [0, 125, 255], '7' : [0, 125, 0], '8' : [125, 125, 125], '9' : [0, 125, 255], '10' : [125, 0, 125], '11' : [0, 255, 0], '12' : [0, 0, 255], '13' : [0, 255, 255], '14' : [255, 125, 125], '15' : [255, 0, 255]} output = np.zeros(prediction.shape) for unq_class in unique_classes: rgb_class_unique = rgb_colors[str(int(unq_class))] output[:,:,0][prediction[:,:,0]==unq_class] = rgb_class_unique[0] output[:,:,1][prediction[:,:,0]==unq_class] = rgb_class_unique[1] output[:,:,2][prediction[:,:,0]==unq_class] = rgb_class_unique[2] img = resize_image(img, output.shape[0], output.shape[1]) output = output.astype(np.int32) img = img.astype(np.int32) added_image = cv2.addWeighted(img,0.5,output,0.1,0) return added_image def return_num_columns(img): model_classifier = from_pretrained_keras("SBB/eynollah-column-classifier") img_1ch = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img_1ch = img_1ch / 255.0 img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) img_in[0, :, :, 0] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] label_p_pred = model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 return num_col def return_scaled_image(img, num_col, width_early, model_name): if model_name == "SBB/eynollah-main-regions-aug-rotation" or "SBB/eynollah-main-regions-aug-scaling" or "SBB/eynollah-main-regions-ensembled" or "SBB/eynollah-textline" or "SBB/eynollah-binarization": if num_col == 1 and width_early < 1100: img_w_new = 2000 img_h_new = int(img.shape[0] / float(img.shape[1]) * 2000) elif num_col == 1 and width_early >= 2500: img_w_new = 2000 img_h_new = int(img.shape[0] / float(img.shape[1]) * 2000) elif num_col == 1 and width_early >= 1100 and width_early < 2500: img_w_new = width_early img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 2 and width_early < 2000: img_w_new = 2400 img_h_new = int(img.shape[0] / float(img.shape[1]) * 2400) elif num_col == 2 and width_early >= 3500: img_w_new = 2400 img_h_new = int(img.shape[0] / float(img.shape[1]) * 2400) elif num_col == 2 and width_early >= 2000 and width_early < 3500: img_w_new = width_early img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 3 and width_early < 2000: img_w_new = 3000 img_h_new = int(img.shape[0] / float(img.shape[1]) * 3000) elif num_col == 3 and width_early >= 4000: img_w_new = 3000 img_h_new = int(img.shape[0] / float(img.shape[1]) * 3000) elif num_col == 3 and width_early >= 2000 and width_early < 4000: img_w_new = width_early img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 4 and width_early < 2500: img_w_new = 4000 img_h_new = int(img.shape[0] / float(img.shape[1]) * 4000) elif num_col == 4 and width_early >= 5000: img_w_new = 4000 img_h_new = int(img.shape[0] / float(img.shape[1]) * 4000) elif num_col == 4 and width_early >= 2500 and width_early < 5000: img_w_new = width_early img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 5 and width_early < 3700: img_w_new = 5000 img_h_new = int(img.shape[0] / float(img.shape[1]) * 5000) elif num_col == 5 and width_early >= 7000: img_w_new = 5000 img_h_new = int(img.shape[0] / float(img.shape[1]) * 5000) elif num_col == 5 and width_early >= 3700 and width_early < 7000: img_w_new = width_early img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) elif num_col == 6 and width_early < 4500: img_w_new = 6500 # 5400 img_h_new = int(img.shape[0] / float(img.shape[1]) * 6500) else: img_w_new = width_early img_h_new = int(img.shape[0] / float(img.shape[1]) * width_early) img_new = resize_image(img, img_h_new, img_w_new) elif model_name=="SBB/eynollah-main-regions" or "SBB/eynollah-textline_light": if num_col == 1: img_w_new = 1000 img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new) elif num_col == 2: img_w_new = 1500 img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new) elif num_col == 3: img_w_new = 2000 img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new) elif num_col == 4: img_w_new = 2500 img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new) elif num_col == 5: img_w_new = 3000 img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new) else: img_w_new = 4000 img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new) img_resized = resize_image(img,img_h_new, img_w_new ) img_new = otsu_copy_binary(img_resized) return img_new def do_prediction(model_name, img): img_org = np.copy(img) model = from_pretrained_keras(model_name) match model_name: # numerical output case "SBB/eynollah-column-classifier": img_1ch = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img_1ch = img_1ch / 255.0 img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) img_in[0, :, :, 0] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] label_p_pred = model.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 return "Found {} columns".format(num_col), None case "SBB/eynollah-page-extraction": img_height_model = model.layers[len(model.layers) - 1].output_shape[1] img_width_model = model.layers[len(model.layers) - 1].output_shape[2] img_h_page = img.shape[0] img_w_page = img.shape[1] img = img / float(255.0) img = resize_image(img, img_height_model, img_width_model) label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) prediction_true = prediction_true.astype(np.uint8) imgray = cv2.cvtColor(prediction_true, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) #thresh = cv2.dilate(thresh, KERNEL, iterations=3) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if len(contours)>0: cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) cnt = contours[np.argmax(cnt_size)] x, y, w, h = cv2.boundingRect(cnt) if x <= 30: w += x x = 0 if (img_org.shape[1] - (x + w)) <= 30: w = w + (img_org.shape[1] - (x + w)) if y <= 30: h = h + y y = 0 if (img_org.shape[0] - (y + h)) <= 30: h = h + (img_org.shape[0] - (y + h)) box = [x, y, w, h] img_border = np.zeros((prediction_true.shape[0],prediction_true.shape[1])) img_border[y:y+h, x:x+w] = 1 img_border = np.repeat(img_border[:, :, np.newaxis], 3, axis=2) else: img_border = np.zeros((prediction_true.shape[0],prediction_true.shape[1])) img_border[:, :] = 1 img_border = np.repeat(img_border[:, :, np.newaxis], 3, axis=2) return "No numerical output", visualize_model_output(img_border,img_org, model_name) # bitmap output case "SBB/eynollah-binarization" | "SBB/eynollah-textline" | "SBB/eynollah-textline_light" | "SBB/eynollah-enhancement" | "SBB/eynollah-tables" | "SBB/eynollah-main-regions" | "SBB/eynollah-main-regions-aug-rotation" | "SBB/eynollah-main-regions-aug-scaling" | "SBB/eynollah-main-regions-ensembled" | "SBB/eynollah-full-regions-1column" | "SBB/eynollah-full-regions-3pluscolumn": img_height_model=model.layers[len(model.layers)-1].output_shape[1] img_width_model=model.layers[len(model.layers)-1].output_shape[2] n_classes=model.layers[len(model.layers)-1].output_shape[3] img_org = np.copy(img) img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] num_col_classifier = return_num_columns(img) width_early = img.shape[1] img = return_scaled_image(img, num_col_classifier, width_early, model_name) if img.shape[0] < img_height_model: img = resize_image(img, img_height_model, img.shape[1]) if img.shape[1] < img_width_model: img = resize_image(img, img.shape[0], img_width_model) marginal_of_patch_percent = 0.1 margin = int(marginal_of_patch_percent * img_height_model) width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin img = img / float(255.0) img = img.astype(np.float16) img_h = img.shape[0] img_w = img.shape[1] prediction_true = np.zeros((img_h, img_w, 3)) mask_true = np.zeros((img_h, img_w)) nxf = img_w / float(width_mid) nyf = img_h / float(height_mid) nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) for i in range(nxf): for j in range(nyf): if i == 0: index_x_d = i * width_mid index_x_u = index_x_d + img_width_model else: index_x_d = i * width_mid index_x_u = index_x_d + img_width_model if j == 0: index_y_d = j * height_mid index_y_u = index_y_d + img_height_model else: index_y_d = j * height_mid index_y_u = index_y_d + img_height_model if index_x_u > img_w: index_x_u = img_w index_x_d = img_w - img_width_model if index_y_u > img_h: index_y_u = img_h index_y_d = img_h - img_height_model img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), verbose=0) if model_name == "SBB/eynollah-enhancement": seg_color = label_p_pred[0, :, :, :] seg_color = seg_color * 255 else: seg = np.argmax(label_p_pred, axis=3)[0] seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) if i == 0 and j == 0: seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] #seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color elif i == nxf - 1 and j == nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color elif i == 0 and j == nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] #seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color elif i == nxf - 1 and j == 0: seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color elif i == 0 and j != 0 and j != nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] #seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color elif i == nxf - 1 and j != 0 and j != nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color elif i != 0 and i != nxf - 1 and j == 0: seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color elif i != 0 and i != nxf - 1 and j == nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color else: seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color if model_name == "SBB/eynollah-enhancement": prediction_true = prediction_true.astype(int) return "No numerical output", prediction_true else: prediction_true = prediction_true.astype(np.uint8) return "No numerical output", visualize_model_output(prediction_true,img_org, model_name) # catch-all (we should not reach this) case _: return None, None title = "Welcome to the Eynollah Demo page! 👁️" description = """
This Space demonstrates the functionality of various Eynollah models developed at SBB.

The Eynollah suite introduces an end-to-end pipeline to extract layout, text lines and reading order for historic documents, where the output can be used as an input for OCR engines. Please keep in mind that with this demo you can just use one of the 13 sub-modules of the whole Eynollah system at a time.
Resources for more information:
""" iface = gr.Interface( title=title, description=description, fn=do_prediction, inputs=[ gr.Dropdown([ "SBB/eynollah-binarization", "SBB/eynollah-enhancement", "SBB/eynollah-page-extraction", "SBB/eynollah-column-classifier", "SBB/eynollah-tables", "SBB/eynollah-textline", "SBB/eynollah-textline_light", "SBB/eynollah-main-regions", "SBB/eynollah-main-regions-aug-rotation", "SBB/eynollah-main-regions-aug-scaling", "SBB/eynollah-main-regions-ensembled", "SBB/eynollah-full-regions-1column", "SBB/eynollah-full-regions-3pluscolumn" ], label="Select one model of the Eynollah suite 👇", info=""), gr.Image() ], outputs=[ gr.Textbox(label="Output of model (numerical or bitmap) ⬇️"), gr.Image() ], #examples=[['example-1.jpg']] ) iface.launch()