import os import pandas as pd import os os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "./titanium-scope-436311-t3-966373f5aa2f.json" def run_tesseract_on_image(image_path): # -> tsv output path print("image_path",image_path) image_name = os.path.basename(image_path) image_name = image_name[:image_name.find('.')] error_code = os.system(f''' tesseract "{image_path}" "/content/{image_name}" -l eng tsv ''') if not error_code: return f"/content/{image_name}.tsv" else: raise ValueError('Tesseract OCR Error please verify image format PNG,JPG,JPEG') def clean_tesseract_output(tsv_output_path): print("tsv_output_path",tsv_output_path) ocr_df = pd.read_csv(tsv_output_path, sep='\t') ocr_df = ocr_df.dropna() ocr_df = ocr_df.drop(ocr_df[ocr_df.text.str.strip() == ''].index) text_output = ' '.join(ocr_df.text.tolist()) words = [] for index, row in ocr_df.iterrows(): word = {} origin_box = [row['left'], row['top'], row['left'] + row['width'], row['top']+row['height']] word['word_text'] = row['text'] word['word_box'] = origin_box words.append(word) return words def detect_text(path): print("this is path:",path) """Detects text in the file.""" from google.cloud import vision client = vision.ImageAnnotatorClient() with open(path, "rb") as image_file: content = image_file.read() image = vision.Image(content=content) response = client.text_detection(image=image) texts = response.text_annotations print("Texts:") list_of_dict = [] for text in texts[1:]: data_dic = {} print(f'\n"{text.description}"') data_dic["word_text"] = text.description vertices_list = [[int(vertex.x),int(vertex.y)] for vertex in text.bounding_poly.vertices] print("vertices_list",vertices_list) coords = vertices_list sorted_coords = sorted(coords, key=lambda coord: (coord[0] + coord[1])) # Top-left is the first in the sorted list (smallest sum of x, y) top_left = sorted_coords[0] # Bottom-right is the last in the sorted list (largest sum of x, y) bottom_right = sorted_coords[-1] ls = [] ls.append(top_left[0]) ls.append(top_left[1]) ls.append(bottom_right[0]) ls.append(bottom_right[1]) # print(ls) # ls = [] # ls.append(vertices_list[0][0]) # ls.append(vertices_list[0][1]) # ls.append(vertices_list[2][0]) # ls.append(vertices_list[2][1]) data_dic["word_box"] = ls list_of_dict.append(data_dic) if response.error.message: raise Exception( "{}\nFor more info on error messages, check: " "https://cloud.google.com/apis/design/errors".format(response.error.message) ) return list_of_dict def prepare_batch_for_inference(image_paths): # tesseract_outputs is a list of paths inference_batch = dict() # tesseract_outputs = [run_tesseract_on_image( # image_path) for image_path in image_paths] # tesseract_outputs = [] # for image_path in image_paths: # output = run_tesseract_on_image(image_path) # tesseract_outputs.append(output) # clean_outputs is a list of lists # clean_outputs = [clean_tesseract_output( # tsv_path) for tsv_path in tesseract_outputs] # clean_outputs = [] # for tsv_path in tesseract_outputs: # output = clean_tesseract_output(tsv_path) # clean_outputs.append(output) clean_outputs = [] for image_path in image_paths: output = detect_text(image_path) clean_outputs.append(output) print("clean_outputs",clean_outputs) word_lists = [[word['word_text'] for word in clean_output] for clean_output in clean_outputs] boxes_lists = [[word['word_box'] for word in clean_output] for clean_output in clean_outputs] inference_batch = { "image_path": image_paths, "bboxes": boxes_lists, "words": word_lists } return inference_batch