Spaces:

shreyasvaidya
/

Scene-Text-Translator

Running

App Files Files Community

Scene-Text-Translator / IndicPhotoOCR /utils /helper.py

shreyasvaidya

Upload folder using huggingface_hub

01bb3bb verified 23 days ago

raw

history blame contribute delete

10.2 kB

	import numpy as np

	# def detect_para(bbox_dict):
	# alpha1 = 0.2
	# alpha2 = 0.7
	# beta1 = 0.4
	# data = bbox_dict
	# word_crops = list(data.keys())
	# for i in word_crops:
	# data[i]["x1"], data[i]["y1"], data[i]["x2"], data[i]["y2"] = data[i]["bbox"]
	# data[i]["xc"] = (data[i]["x1"] + data[i]["x2"]) / 2
	# data[i]["yc"] = (data[i]["y1"] + data[i]["y2"]) / 2
	# data[i]["w"] = data[i]["x2"] - data[i]["x1"]
	# data[i]["h"] = data[i]["y2"] - data[i]["y1"]

	# patch_info = {}
	# while word_crops:
	# img_name = word_crops[0].split("_")[0]
	# word_crop_collection = [
	# word_crop for word_crop in word_crops if word_crop.startswith(img_name)
	# ]
	# centroids = {}
	# lines = []
	# img_word_crops = word_crop_collection.copy()
	# para = []
	# while img_word_crops:
	# clusters = []
	# para_words_group = [
	# img_word_crops[0],
	# ]
	# added = [
	# img_word_crops[0],
	# ]
	# img_word_crops.remove(img_word_crops[0])
	# ## determining the paragraph
	# while added:
	# word_crop = added.pop()
	# for i in range(len(img_word_crops)):
	# word_crop_ = img_word_crops[i]
	# if (
	# abs(data[word_crop_]["yc"] - data[word_crop]["yc"])
	# < data[word_crop]["h"] * alpha1
	# ):
	# if data[word_crop]["xc"] > data[word_crop_]["xc"]:
	# if (data[word_crop]["x1"] - data[word_crop_]["x2"]) < data[
	# word_crop
	# ]["h"] * alpha2:
	# para_words_group.append(word_crop_)
	# added.append(word_crop_)
	# else:
	# if (data[word_crop_]["x1"] - data[word_crop]["x2"]) < data[
	# word_crop
	# ]["h"] * alpha2:
	# para_words_group.append(word_crop_)
	# added.append(word_crop_)
	# else:
	# if data[word_crop]["yc"] > data[word_crop_]["yc"]:
	# if (data[word_crop]["y1"] - data[word_crop_]["y2"]) < data[
	# word_crop
	# ]["h"] * beta1 and (
	# (
	# (data[word_crop_]["x1"] < data[word_crop]["x2"])
	# and (data[word_crop_]["x1"] > data[word_crop]["x1"])
	# )
	# or (
	# (data[word_crop_]["x2"] < data[word_crop]["x2"])
	# and (data[word_crop_]["x2"] > data[word_crop]["x1"])
	# )
	# or (
	# (data[word_crop]["x1"] > data[word_crop_]["x1"])
	# and (data[word_crop]["x2"] < data[word_crop_]["x2"])
	# )
	# ):
	# para_words_group.append(word_crop_)
	# added.append(word_crop_)
	# else:
	# if (data[word_crop_]["y1"] - data[word_crop]["y2"]) < data[
	# word_crop
	# ]["h"] * beta1 and (
	# (
	# (data[word_crop_]["x1"] < data[word_crop]["x2"])
	# and (data[word_crop_]["x1"] > data[word_crop]["x1"])
	# )
	# or (
	# (data[word_crop_]["x2"] < data[word_crop]["x2"])
	# and (data[word_crop_]["x2"] > data[word_crop]["x1"])
	# )
	# or (
	# (data[word_crop]["x1"] > data[word_crop_]["x1"])
	# and (data[word_crop]["x2"] < data[word_crop_]["x2"])
	# )
	# ):
	# para_words_group.append(word_crop_)
	# added.append(word_crop_)
	# img_word_crops = [p for p in img_word_crops if p not in para_words_group]
	# ## processing for the line
	# while para_words_group:
	# line_words_group = [
	# para_words_group[0],
	# ]
	# added = [
	# para_words_group[0],
	# ]
	# para_words_group.remove(para_words_group[0])
	# ## determining the line
	# while added:
	# word_crop = added.pop()
	# for i in range(len(para_words_group)):
	# word_crop_ = para_words_group[i]
	# if (
	# abs(data[word_crop_]["yc"] - data[word_crop]["yc"])
	# < data[word_crop]["h"] * alpha1
	# ):
	# if data[word_crop]["xc"] > data[word_crop_]["xc"]:
	# if (data[word_crop]["x1"] - data[word_crop_]["x2"]) < data[
	# word_crop
	# ]["h"] * alpha2:
	# line_words_group.append(word_crop_)
	# added.append(word_crop_)
	# else:
	# if (data[word_crop_]["x1"] - data[word_crop]["x2"]) < data[
	# word_crop
	# ]["h"] * alpha2:
	# line_words_group.append(word_crop_)
	# added.append(word_crop_)
	# para_words_group = [
	# p for p in para_words_group if p not in line_words_group
	# ]
	# xc = [data[word_crop]["xc"] for word_crop in line_words_group]
	# idxs = np.argsort(xc)
	# patch_cluster_ = [line_words_group[i] for i in idxs]
	# line_words_group = patch_cluster_
	# x1 = [data[word_crop]["x1"] for word_crop in line_words_group]
	# x2 = [data[word_crop]["x2"] for word_crop in line_words_group]
	# y1 = [data[word_crop]["y1"] for word_crop in line_words_group]
	# y2 = [data[word_crop]["y2"] for word_crop in line_words_group]
	# txt_line = [data[word_crop]["txt"] for word_crop in line_words_group]
	# txt = " ".join(txt_line)
	# x = [x1[0]]
	# y1_ = [y1[0]]
	# y2_ = [y2[0]]
	# l = [len(txt_l) for txt_l in txt_line]
	# for i in range(1, len(x1)):
	# x.append((x1[i] + x2[i - 1]) / 2)
	# y1_.append((y1[i] + y1[i - 1]) / 2)
	# y2_.append((y2[i] + y2[i - 1]) / 2)
	# x.append(x2[-1])
	# y1_.append(y1[-1])
	# y2_.append(y2[-1])
	# line_info = {
	# "x": x,
	# "y1": y1_,
	# "y2": y2_,
	# "l": l,
	# "txt": txt,
	# "word_crops": line_words_group,
	# }
	# clusters.append(line_info)
	# y_ = [clusters[i]["y1"][0] for i in range(len(clusters))]
	# idxs = np.argsort(y_)
	# clusters_ = [clusters[i] for i in idxs]
	# txt = [clusters[i]["txt"] for i in idxs]
	# l = [len(t) for t in txt]
	# txt = " ".join(txt)
	# para_info = {"lines": clusters_, "l": l, "txt": txt}
	# para.append(para_info)

	# for word_crop in word_crop_collection:
	# word_crops.remove(word_crop)
	# return "\n".join([para[i]["txt"] for i in range(len(para))])


	def detect_para(recognized_texts):
	"""
	Sort words into lines based on horizontal overlap of bounding boxes.

	Args:
	recognized_texts (dict): A dictionary with recognized texts as keys and bounding boxes as values.
	Each bounding box is a list of points [x1, y1, x2, y2].

	Returns:
	list: A list of lists where each sublist contains words sorted by x-coordinate for a single line.
	"""
	def calculate_overlap(bbox1, bbox2):
	"""Calculate the vertical overlap between two bounding boxes."""
	# Extract bounding box coordinates
	x1_1, y1_1, x2_1, y2_1 = bbox1
	x1_2, y1_2, x2_2, y2_2 = bbox2

	overlap = max(0, min(y2_1, y2_2) - max(y1_1, y1_2))
	height = min(y2_1 - y1_1, y2_2 - y1_2)
	return overlap / height if height > 0 else 0

	# Convert recognized_texts dictionary to a list of tuples for processing
	items = list(recognized_texts.items())
	lines = []

	while items:
	current_image, current_data = items.pop(0)
	current_text, current_bbox = current_data['txt'], current_data['bbox']
	current_line = [(current_text, current_bbox)]

	remaining_items = []
	for image, data in items:
	text, bbox = data['txt'], data['bbox']
	if calculate_overlap(current_bbox, bbox) > 0.4:
	current_line.append((text, bbox))
	else:
	remaining_items.append((image, data))

	items = remaining_items
	lines.append(current_line)

	# Sort words within each line based on x1 (horizontal position)
	sorted_lines = [
	[text for text, bbox in sorted(line, key=lambda x: x[1][0])] for line in lines
	]
	return sorted_lines