import cv2 import numpy as np from PIL import Image, ImageDraw, ImageFont import json from paddleocr import PaddleOCR import gradio as gr import os # Initialize PaddleOCR ocr = PaddleOCR(use_angle_cls=True, lang='en') # Function to draw bounding boxes on the image def draw_boxes_on_image(image, data): # Convert the image to RGB (OpenCV uses BGR by default) image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) # Load the image into PIL for easier drawing pil_image = Image.fromarray(image_rgb) draw = ImageDraw.Draw(pil_image) # Define a font (using DejaVuSans since it's available by default) try: font = ImageFont.truetype("DejaVuSans.ttf", 20) except IOError: font = ImageFont.load_default() for item in data: bounding_box, (text, confidence) = item # Ensure bounding_box is a list of lists if not isinstance(bounding_box[0], list): bounding_box = [bounding_box] box = np.array(bounding_box).astype(int) # Draw the bounding box draw.line([tuple(box[0]), tuple(box[1])], fill="green", width=2) draw.line([tuple(box[1]), tuple(box[2])], fill="green", width=2) draw.line([tuple(box[2]), tuple(box[3])], fill="green", width=2) draw.line([tuple(box[3]), tuple(box[0])], fill="green", width=2) # Draw the text above the bounding box text_position = (box[0][0], box[0][1] - 20) draw.text(text_position, f"{text} ({confidence:.2f})", fill="red", font=font) return pil_image # Function to convert OCR results to JSON def convert_to_json(results, output_file): """ Converts the given results into a JSON file. Args: results: The list of results containing bounding box coordinates, text, and confidence. output_file: The name of the output JSON file. """ json_data = [] for result in results: bounding_box = result[0] text = result[1][0] confidence = result[1][1] json_data.append({ "bounding_box": [list(map(float, coord)) for coord in bounding_box], "text": text, "confidence": confidence }) with open(output_file, "w") as f: json.dump(json_data, f, indent=4) # Function to identify 'field', 'value' pairs def identify_field_value_pairs(ocr_results, fields): field_value_pairs = {} for line in ocr_results: for word_info in line: text, _ = word_info[1] for field in fields: if field.lower() in text.lower(): # Assuming the value comes immediately after the field value_index = line.index(word_info) + 1 if value_index < len(line): field_value_pairs[field] = line[value_index][1][0] break return field_value_pairs # Function to process the image and generate outputs def process_image(image): ocr_results = ocr.ocr(np.array(image), cls=True) processed_image = draw_boxes_on_image(image, ocr_results[0]) # Save OCR results to JSON json_path = "ocr_results.json" convert_to_json(ocr_results[0], json_path) # Identify field-value pairs fields = ["Scheme Name", "Folio Number", "Number of Units", "PAN", "Signature", "Tax Status", "Mobile Number", "Email", "Address", "Bank Account Details"] field_value_pairs = identify_field_value_pairs(ocr_results[0], fields) field_value_json_path = "field_value_pairs.json" with open(field_value_json_path, 'w') as json_file: json.dump(field_value_pairs, json_file, indent=4) return processed_image, json_path, field_value_json_path # Gradio Interface interface = gr.Interface( fn=process_image, inputs="image", outputs=[ "image", gr.File(label="Download OCR Results JSON"), gr.File(label="Download Field-Value Pairs JSON") ], title="OCR Web Application", description="Upload an image and get OCR results with bounding boxes and two JSON outputs." ) if __name__ == "__main__": interface.launch()