import gradio as gr import tensorflow as tf import numpy as np import cv2 from PIL import Image from huggingface_hub import from_pretrained_keras def resize_image(img_in,input_height,input_width): return cv2.resize( img_in, ( input_width,input_height) ,interpolation=cv2.INTER_NEAREST) def otsu_copy_binary(img): img_r=np.zeros((img.shape[0],img.shape[1],3)) img1=img[:,:,0] retval1, threshold1 = cv2.threshold(img1, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) img_r[:,:,0]=threshold1 img_r[:,:,1]=threshold1 img_r[:,:,2]=threshold1 return img_r def visualize_model_output(prediction, img): unique_classes = np.unique(prediction[:,:,0]) rgb_colors = {'0' : [255, 255, 255], '1' : [255, 0, 0], '2' : [255, 125, 0], '3' : [255, 0, 125], '4' : [125, 125, 125], '5' : [125, 125, 0], '6' : [0, 125, 255], '7' : [0, 125, 0], '8' : [125, 125, 125], '9' : [0, 125, 255], '10' : [125, 0, 125], '11' : [0, 255, 0], '12' : [0, 0, 255], '13' : [0, 255, 255], '14' : [255, 125, 125], '15' : [255, 0, 255]} output = np.zeros(prediction.shape) for unq_class in unique_classes: print(unq_class,'unq_class') rgb_class_unique = rgb_colors[str(int(unq_class))] output[:,:,0][prediction[:,:,0]==unq_class] = rgb_class_unique[0] output[:,:,1][prediction[:,:,0]==unq_class] = rgb_class_unique[1] output[:,:,2][prediction[:,:,0]==unq_class] = rgb_class_unique[2] img = resize_image(img, output.shape[0], output.shape[1]) output = output.astype(np.int32) img = img.astype(np.int32) added_image = cv2.addWeighted(img,0.5,output,0.1,0) return added_image def return_num_columns(img): model_classifier = from_pretrained_keras("SBB/eynollah-column-classifier") img_1ch = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img_1ch = img_1ch / 255.0 img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) img_in[0, :, :, 0] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] label_p_pred = model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 return num_col def do_prediction(model_name, img): img_org = np.copy(img) model = from_pretrained_keras(model_name) match model_name: # numerical output case "SBB/eynollah-column-classifier": img_1ch = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img_1ch = img_1ch / 255.0 img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) img_in[0, :, :, 0] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] label_p_pred = model.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 return "Found {} columns".format(num_col), None # bitmap output case "SBB/eynollah-binarization" | "SBB/eynollah-page-extraction" | "SBB/eynollah-textline" | "SBB/eynollah-textline_light" | "SBB/eynollah-enhancement" | "SBB/eynollah-tables" | "SBB/eynollah-main-regions" | "SBB/eynollah-main-regions-aug-rotation" | "SBB/eynollah-main-regions-aug-scaling" | "SBB/eynollah-main-regions-ensembled" | "SBB/eynollah-full-regions-1column" | "SBB/eynollah-full-regions-3pluscolumn": img_height_model=model.layers[len(model.layers)-1].output_shape[1] img_width_model=model.layers[len(model.layers)-1].output_shape[2] n_classes=model.layers[len(model.layers)-1].output_shape[3] img_org = np.copy(img) img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] num_col_classifier = return_num_columns(img) if num_col_classifier == 1: img_w_new = 1000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: img_w_new = 1500 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 3: img_w_new = 2000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 4: img_w_new = 2500 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 5: img_w_new = 3000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) else: img_w_new = 4000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) img_resized = resize_image(img,img_h_new, img_w_new ) img = otsu_copy_binary(img_resized) if img.shape[0] < img_height_model: img = resize_image(img, img_height_model, img.shape[1]) if img.shape[1] < img_width_model: img = resize_image(img, img.shape[0], img_width_model) marginal_of_patch_percent = 0.1 margin = int(marginal_of_patch_percent * img_height_model) width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin img = img / float(255.0) img = img.astype(np.float16) img_h = img.shape[0] img_w = img.shape[1] prediction_true = np.zeros((img_h, img_w, 3)) mask_true = np.zeros((img_h, img_w)) nxf = img_w / float(width_mid) nyf = img_h / float(height_mid) nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) for i in range(nxf): for j in range(nyf): if i == 0: index_x_d = i * width_mid index_x_u = index_x_d + img_width_model else: index_x_d = i * width_mid index_x_u = index_x_d + img_width_model if j == 0: index_y_d = j * height_mid index_y_u = index_y_d + img_height_model else: index_y_d = j * height_mid index_y_u = index_y_d + img_height_model if index_x_u > img_w: index_x_u = img_w index_x_d = img_w - img_width_model if index_y_u > img_h: index_y_u = img_h index_y_d = img_h - img_height_model img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) if i == 0 and j == 0: seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] #seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color elif i == nxf - 1 and j == nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color elif i == 0 and j == nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] #seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color elif i == nxf - 1 and j == 0: seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color elif i == 0 and j != 0 and j != nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] #seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color elif i == nxf - 1 and j != 0 and j != nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color elif i != 0 and i != nxf - 1 and j == 0: seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color elif i != 0 and i != nxf - 1 and j == nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color else: seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color prediction_true = prediction_true.astype(np.uint8) ''' img = img / float(255.0) image = resize_image(image, 224,448) prediction = model.predict(image.reshape(1,224,448,image.shape[2])) prediction = tf.squeeze(tf.round(prediction)) prediction = np.argmax(prediction,axis=2) prediction = np.repeat(prediction[:, :, np.newaxis]*255, 3, axis=2) print(prediction.shape) ''' #prediction_true = prediction_true * -1 #prediction_true = prediction_true + 1 return "No numerical output", visualize_model_output(prediction_true,img_org) # catch-all (we should not reach this) case _: return None, None title = "Welcome to the Eynollah Demo page! 👁️" description = """
This Space demonstrates the functionality of various Eynollah models developed at SBB.

The Eynollah suite introduces an end-to-end pipeline to extract layout, text lines and reading order for historic documents, where the output can be used as an input for OCR engines. Please keep in mind that with this demo you can just use one of the 13 sub-modules of the whole Eynollah system at a time.
Resources for more information:
""" iface = gr.Interface( title=title, description=description, fn=do_prediction, inputs=[ gr.Dropdown([ "SBB/eynollah-binarization", "SBB/eynollah-enhancement", "SBB/eynollah-page-extraction", "SBB/eynollah-column-classifier", "SBB/eynollah-tables", "SBB/eynollah-textline", "SBB/eynollah-textline_light", "SBB/eynollah-main-regions", "SBB/eynollah-main-regions-aug-rotation", "SBB/eynollah-main-regions-aug-scaling", "SBB/eynollah-main-regions-ensembled", "SBB/eynollah-full-regions-1column", "SBB/eynollah-full-regions-3pluscolumn" ], label="Select one model of the Eynollah suite 👇", info=""), gr.Image() ], outputs=[ gr.Textbox(label="Output of model (numerical or bitmap) ⬇️"), gr.Image() ], #examples=[['example-1.jpg']] ) iface.launch()