import numpy as np | |
# def detect_para(bbox_dict): | |
# alpha1 = 0.2 | |
# alpha2 = 0.7 | |
# beta1 = 0.4 | |
# data = bbox_dict | |
# word_crops = list(data.keys()) | |
# for i in word_crops: | |
# data[i]["x1"], data[i]["y1"], data[i]["x2"], data[i]["y2"] = data[i]["bbox"] | |
# data[i]["xc"] = (data[i]["x1"] + data[i]["x2"]) / 2 | |
# data[i]["yc"] = (data[i]["y1"] + data[i]["y2"]) / 2 | |
# data[i]["w"] = data[i]["x2"] - data[i]["x1"] | |
# data[i]["h"] = data[i]["y2"] - data[i]["y1"] | |
# patch_info = {} | |
# while word_crops: | |
# img_name = word_crops[0].split("_")[0] | |
# word_crop_collection = [ | |
# word_crop for word_crop in word_crops if word_crop.startswith(img_name) | |
# ] | |
# centroids = {} | |
# lines = [] | |
# img_word_crops = word_crop_collection.copy() | |
# para = [] | |
# while img_word_crops: | |
# clusters = [] | |
# para_words_group = [ | |
# img_word_crops[0], | |
# ] | |
# added = [ | |
# img_word_crops[0], | |
# ] | |
# img_word_crops.remove(img_word_crops[0]) | |
# ## determining the paragraph | |
# while added: | |
# word_crop = added.pop() | |
# for i in range(len(img_word_crops)): | |
# word_crop_ = img_word_crops[i] | |
# if ( | |
# abs(data[word_crop_]["yc"] - data[word_crop]["yc"]) | |
# < data[word_crop]["h"] * alpha1 | |
# ): | |
# if data[word_crop]["xc"] > data[word_crop_]["xc"]: | |
# if (data[word_crop]["x1"] - data[word_crop_]["x2"]) < data[ | |
# word_crop | |
# ]["h"] * alpha2: | |
# para_words_group.append(word_crop_) | |
# added.append(word_crop_) | |
# else: | |
# if (data[word_crop_]["x1"] - data[word_crop]["x2"]) < data[ | |
# word_crop | |
# ]["h"] * alpha2: | |
# para_words_group.append(word_crop_) | |
# added.append(word_crop_) | |
# else: | |
# if data[word_crop]["yc"] > data[word_crop_]["yc"]: | |
# if (data[word_crop]["y1"] - data[word_crop_]["y2"]) < data[ | |
# word_crop | |
# ]["h"] * beta1 and ( | |
# ( | |
# (data[word_crop_]["x1"] < data[word_crop]["x2"]) | |
# and (data[word_crop_]["x1"] > data[word_crop]["x1"]) | |
# ) | |
# or ( | |
# (data[word_crop_]["x2"] < data[word_crop]["x2"]) | |
# and (data[word_crop_]["x2"] > data[word_crop]["x1"]) | |
# ) | |
# or ( | |
# (data[word_crop]["x1"] > data[word_crop_]["x1"]) | |
# and (data[word_crop]["x2"] < data[word_crop_]["x2"]) | |
# ) | |
# ): | |
# para_words_group.append(word_crop_) | |
# added.append(word_crop_) | |
# else: | |
# if (data[word_crop_]["y1"] - data[word_crop]["y2"]) < data[ | |
# word_crop | |
# ]["h"] * beta1 and ( | |
# ( | |
# (data[word_crop_]["x1"] < data[word_crop]["x2"]) | |
# and (data[word_crop_]["x1"] > data[word_crop]["x1"]) | |
# ) | |
# or ( | |
# (data[word_crop_]["x2"] < data[word_crop]["x2"]) | |
# and (data[word_crop_]["x2"] > data[word_crop]["x1"]) | |
# ) | |
# or ( | |
# (data[word_crop]["x1"] > data[word_crop_]["x1"]) | |
# and (data[word_crop]["x2"] < data[word_crop_]["x2"]) | |
# ) | |
# ): | |
# para_words_group.append(word_crop_) | |
# added.append(word_crop_) | |
# img_word_crops = [p for p in img_word_crops if p not in para_words_group] | |
# ## processing for the line | |
# while para_words_group: | |
# line_words_group = [ | |
# para_words_group[0], | |
# ] | |
# added = [ | |
# para_words_group[0], | |
# ] | |
# para_words_group.remove(para_words_group[0]) | |
# ## determining the line | |
# while added: | |
# word_crop = added.pop() | |
# for i in range(len(para_words_group)): | |
# word_crop_ = para_words_group[i] | |
# if ( | |
# abs(data[word_crop_]["yc"] - data[word_crop]["yc"]) | |
# < data[word_crop]["h"] * alpha1 | |
# ): | |
# if data[word_crop]["xc"] > data[word_crop_]["xc"]: | |
# if (data[word_crop]["x1"] - data[word_crop_]["x2"]) < data[ | |
# word_crop | |
# ]["h"] * alpha2: | |
# line_words_group.append(word_crop_) | |
# added.append(word_crop_) | |
# else: | |
# if (data[word_crop_]["x1"] - data[word_crop]["x2"]) < data[ | |
# word_crop | |
# ]["h"] * alpha2: | |
# line_words_group.append(word_crop_) | |
# added.append(word_crop_) | |
# para_words_group = [ | |
# p for p in para_words_group if p not in line_words_group | |
# ] | |
# xc = [data[word_crop]["xc"] for word_crop in line_words_group] | |
# idxs = np.argsort(xc) | |
# patch_cluster_ = [line_words_group[i] for i in idxs] | |
# line_words_group = patch_cluster_ | |
# x1 = [data[word_crop]["x1"] for word_crop in line_words_group] | |
# x2 = [data[word_crop]["x2"] for word_crop in line_words_group] | |
# y1 = [data[word_crop]["y1"] for word_crop in line_words_group] | |
# y2 = [data[word_crop]["y2"] for word_crop in line_words_group] | |
# txt_line = [data[word_crop]["txt"] for word_crop in line_words_group] | |
# txt = " ".join(txt_line) | |
# x = [x1[0]] | |
# y1_ = [y1[0]] | |
# y2_ = [y2[0]] | |
# l = [len(txt_l) for txt_l in txt_line] | |
# for i in range(1, len(x1)): | |
# x.append((x1[i] + x2[i - 1]) / 2) | |
# y1_.append((y1[i] + y1[i - 1]) / 2) | |
# y2_.append((y2[i] + y2[i - 1]) / 2) | |
# x.append(x2[-1]) | |
# y1_.append(y1[-1]) | |
# y2_.append(y2[-1]) | |
# line_info = { | |
# "x": x, | |
# "y1": y1_, | |
# "y2": y2_, | |
# "l": l, | |
# "txt": txt, | |
# "word_crops": line_words_group, | |
# } | |
# clusters.append(line_info) | |
# y_ = [clusters[i]["y1"][0] for i in range(len(clusters))] | |
# idxs = np.argsort(y_) | |
# clusters_ = [clusters[i] for i in idxs] | |
# txt = [clusters[i]["txt"] for i in idxs] | |
# l = [len(t) for t in txt] | |
# txt = " ".join(txt) | |
# para_info = {"lines": clusters_, "l": l, "txt": txt} | |
# para.append(para_info) | |
# for word_crop in word_crop_collection: | |
# word_crops.remove(word_crop) | |
# return "\n".join([para[i]["txt"] for i in range(len(para))]) | |
def detect_para(recognized_texts): | |
""" | |
Sort words into lines based on horizontal overlap of bounding boxes. | |
Args: | |
recognized_texts (dict): A dictionary with recognized texts as keys and bounding boxes as values. | |
Each bounding box is a list of points [x1, y1, x2, y2]. | |
Returns: | |
list: A list of lists where each sublist contains words sorted by x-coordinate for a single line. | |
""" | |
def calculate_overlap(bbox1, bbox2): | |
"""Calculate the vertical overlap between two bounding boxes.""" | |
# Extract bounding box coordinates | |
x1_1, y1_1, x2_1, y2_1 = bbox1 | |
x1_2, y1_2, x2_2, y2_2 = bbox2 | |
overlap = max(0, min(y2_1, y2_2) - max(y1_1, y1_2)) | |
height = min(y2_1 - y1_1, y2_2 - y1_2) | |
return overlap / height if height > 0 else 0 | |
# Convert recognized_texts dictionary to a list of tuples for processing | |
items = list(recognized_texts.items()) | |
lines = [] | |
while items: | |
current_image, current_data = items.pop(0) | |
current_text, current_bbox = current_data['txt'], current_data['bbox'] | |
current_line = [(current_text, current_bbox)] | |
remaining_items = [] | |
for image, data in items: | |
text, bbox = data['txt'], data['bbox'] | |
if calculate_overlap(current_bbox, bbox) > 0.4: | |
current_line.append((text, bbox)) | |
else: | |
remaining_items.append((image, data)) | |
items = remaining_items | |
lines.append(current_line) | |
# Sort words within each line based on x1 (horizontal position) | |
sorted_lines = [ | |
[text for text, bbox in sorted(line, key=lambda x: x[1][0])] for line in lines | |
] | |
return sorted_lines | |