|
import cv2 |
|
import numpy as np |
|
from PIL import Image, ImageDraw, ImageFont |
|
import json |
|
from paddleocr import PaddleOCR |
|
import gradio as gr |
|
import os |
|
|
|
|
|
ocr = PaddleOCR(use_angle_cls=True, lang='en') |
|
|
|
|
|
def draw_boxes_on_image(image, data): |
|
|
|
image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) |
|
|
|
|
|
pil_image = Image.fromarray(image_rgb) |
|
draw = ImageDraw.Draw(pil_image) |
|
|
|
|
|
try: |
|
font = ImageFont.truetype("DejaVuSans.ttf", 20) |
|
except IOError: |
|
font = ImageFont.load_default() |
|
|
|
for item in data: |
|
bounding_box, (text, confidence) = item |
|
|
|
|
|
box = np.array(bounding_box).astype(int) |
|
|
|
|
|
draw.line([tuple(box[0]), tuple(box[1])], fill="green", width=2) |
|
draw.line([tuple(box[1]), tuple(box[2])], fill="green", width=2) |
|
draw.line([tuple(box[2]), tuple(box[3])], fill="green", width=2) |
|
draw.line([tuple(box[3]), tuple(box[0])], fill="green", width=2) |
|
|
|
|
|
text_position = (box[0][0], box[0][1] - 20) |
|
draw.text(text_position, f"{text} ({confidence:.2f})", fill="red", font=font) |
|
|
|
return pil_image |
|
|
|
|
|
def save_results_to_json(ocr_results): |
|
results = [] |
|
|
|
for line in ocr_results: |
|
for word_info in line: |
|
bounding_box = word_info[0] |
|
text, confidence = word_info[1] |
|
results.append({ |
|
"bounding_box": [list(map(float, coord)) for coord in bounding_box], |
|
"text": text, |
|
"confidence": confidence |
|
}) |
|
|
|
return results |
|
|
|
|
|
def identify_field_value_pairs(ocr_results, fields): |
|
field_value_pairs = {} |
|
for line in ocr_results: |
|
for word_info in line: |
|
text, _ = word_info[1] |
|
for field in fields: |
|
if field.lower() in text.lower(): |
|
|
|
value_index = line.index(word_info) + 1 |
|
if value_index < len(line): |
|
field_value_pairs[field] = line[value_index][1][0] |
|
break |
|
return field_value_pairs |
|
|
|
|
|
def process_image(image): |
|
ocr_results = ocr.ocr(np.array(image), cls=True) |
|
processed_image = draw_boxes_on_image(image, ocr_results[0]) |
|
|
|
|
|
results_json = save_results_to_json(ocr_results[0]) |
|
json_path = "ocr_results.json" |
|
with open(json_path, 'w') as json_file: |
|
json.dump(results_json, json_file, indent=4) |
|
|
|
|
|
fields = ["Scheme Name", "Folio Number", "Number of Units", "PAN", "Signature", "Tax Status", |
|
"Mobile Number", "Email", "Address", "Bank Account Details"] |
|
field_value_pairs = identify_field_value_pairs(ocr_results[0], fields) |
|
field_value_json_path = "field_value_pairs.json" |
|
with open(field_value_json_path, 'w') as json_file: |
|
json.dump(field_value_pairs, json_file, indent=4) |
|
|
|
return processed_image, json_path, field_value_json_path |
|
|
|
|
|
interface = gr.Interface( |
|
fn=process_image, |
|
inputs="image", |
|
outputs=[ |
|
"image", |
|
gr.File(label="OCR Results JSON"), |
|
gr.File(label="Field-Value Pairs JSON") |
|
], |
|
title="OCR Web Application", |
|
description="Upload an image and get OCR results with bounding boxes and two JSON outputs." |
|
) |
|
|
|
if __name__ == "__main__": |
|
interface.launch() |
|
|