Spaces:
Paused
Paused
import os | |
import pandas as pd | |
import os | |
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "./titanium-scope-436311-t3-966373f5aa2f.json" | |
def run_tesseract_on_image(image_path): # -> tsv output path | |
print("image_path",image_path) | |
image_name = os.path.basename(image_path) | |
image_name = image_name[:image_name.find('.')] | |
error_code = os.system(f''' | |
tesseract "{image_path}" "/content/{image_name}" -l eng tsv | |
''') | |
if not error_code: | |
return f"/content/{image_name}.tsv" | |
else: | |
raise ValueError('Tesseract OCR Error please verify image format PNG,JPG,JPEG') | |
def clean_tesseract_output(tsv_output_path): | |
print("tsv_output_path",tsv_output_path) | |
ocr_df = pd.read_csv(tsv_output_path, sep='\t') | |
ocr_df = ocr_df.dropna() | |
ocr_df = ocr_df.drop(ocr_df[ocr_df.text.str.strip() == ''].index) | |
text_output = ' '.join(ocr_df.text.tolist()) | |
words = [] | |
for index, row in ocr_df.iterrows(): | |
word = {} | |
origin_box = [row['left'], row['top'], row['left'] + | |
row['width'], row['top']+row['height']] | |
word['word_text'] = row['text'] | |
word['word_box'] = origin_box | |
words.append(word) | |
return words | |
def detect_text(path): | |
print("this is path:",path) | |
"""Detects text in the file.""" | |
from google.cloud import vision | |
client = vision.ImageAnnotatorClient() | |
with open(path, "rb") as image_file: | |
content = image_file.read() | |
image = vision.Image(content=content) | |
response = client.text_detection(image=image) | |
texts = response.text_annotations | |
print("Texts:") | |
list_of_dict = [] | |
for text in texts[1:]: | |
data_dic = {} | |
print(f'\n"{text.description}"') | |
data_dic["word_text"] = text.description | |
vertices_list = [[int(vertex.x),int(vertex.y)] for vertex in text.bounding_poly.vertices] | |
print("vertices_list",vertices_list) | |
coords = vertices_list | |
sorted_coords = sorted(coords, key=lambda coord: (coord[0] + coord[1])) | |
# Top-left is the first in the sorted list (smallest sum of x, y) | |
top_left = sorted_coords[0] | |
# Bottom-right is the last in the sorted list (largest sum of x, y) | |
bottom_right = sorted_coords[-1] | |
ls = [] | |
ls.append(top_left[0]) | |
ls.append(top_left[1]) | |
ls.append(bottom_right[0]) | |
ls.append(bottom_right[1]) | |
# print(ls) | |
# ls = [] | |
# ls.append(vertices_list[0][0]) | |
# ls.append(vertices_list[0][1]) | |
# ls.append(vertices_list[2][0]) | |
# ls.append(vertices_list[2][1]) | |
data_dic["word_box"] = ls | |
list_of_dict.append(data_dic) | |
if response.error.message: | |
raise Exception( | |
"{}\nFor more info on error messages, check: " | |
"https://cloud.google.com/apis/design/errors".format(response.error.message) | |
) | |
return list_of_dict | |
def prepare_batch_for_inference(image_paths): | |
# tesseract_outputs is a list of paths | |
inference_batch = dict() | |
# tesseract_outputs = [run_tesseract_on_image( | |
# image_path) for image_path in image_paths] | |
# tesseract_outputs = [] | |
# for image_path in image_paths: | |
# output = run_tesseract_on_image(image_path) | |
# tesseract_outputs.append(output) | |
# clean_outputs is a list of lists | |
# clean_outputs = [clean_tesseract_output( | |
# tsv_path) for tsv_path in tesseract_outputs] | |
# clean_outputs = [] | |
# for tsv_path in tesseract_outputs: | |
# output = clean_tesseract_output(tsv_path) | |
# clean_outputs.append(output) | |
clean_outputs = [] | |
for image_path in image_paths: | |
output = detect_text(image_path) | |
clean_outputs.append(output) | |
print("clean_outputs",clean_outputs) | |
word_lists = [[word['word_text'] for word in clean_output] | |
for clean_output in clean_outputs] | |
boxes_lists = [[word['word_box'] for word in clean_output] | |
for clean_output in clean_outputs] | |
inference_batch = { | |
"image_path": image_paths, | |
"bboxes": boxes_lists, | |
"words": word_lists | |
} | |
return inference_batch | |