File size: 4,551 Bytes
6a83e83 5e4f057 ad14750 024ddf4 ad14750 024ddf4 ad14750 024ddf4 ad14750 024ddf4 ad14750 024ddf4 ad14750 1939ca9 ad14750 024ddf4 ad14750 024ddf4 ad14750 024ddf4 1939ca9 024ddf4 1939ca9 024ddf4 1939ca9 024ddf4 1939ca9 024ddf4 1939ca9 024ddf4 1939ca9 024ddf4 ad14750 024ddf4 ad14750 024ddf4 ad14750 024ddf4 ad14750 024ddf4 e3ae3eb ac5b07c 1939ca9 024ddf4 e3ae3eb ac5b07c 024ddf4 ac5b07c 024ddf4 ac5b07c 024ddf4 ad14750 5e4f057 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import json
from paddleocr import PaddleOCR
from transformers import pipeline
import gradio as gr
# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')
# Predefined fields for extraction
FIELDS = ["Scheme Name", "Folio Number", "Number of Units", "PAN", "Signature", "Tax Status",
"Mobile Number", "Email", "Address", "Bank Account Details"]
def draw_boxes_on_image(image, data):
"""
Draw bounding boxes and text on the image.
Args:
image (PIL Image): The input image.
data (list): OCR results containing bounding boxes and detected text.
Returns:
PIL Image: The image with drawn boxes.
"""
draw = ImageDraw.Draw(image)
try:
font = ImageFont.truetype("arial.ttf", 20)
except IOError:
font = ImageFont.load_default()
for item_id, item in enumerate(data, start=1):
bounding_box, (text, confidence) = item
box = np.array(bounding_box).astype(int)
draw.line([tuple(box[0]), tuple(box[1])], fill="green", width=2)
draw.line([tuple(box[1]), tuple(box[2])], fill="green", width=2)
draw.line([tuple(box[2]), tuple(box[3])], fill="green", width=2)
draw.line([tuple(box[3]), tuple(box[0])], fill="green", width=2)
text_position = (box[0][0], box[0][1] - 20)
draw.text(text_position, f"{item_id}: {text} ({confidence:.2f})", fill="red", font=font)
return image
def convert_to_json(results):
"""
Converts the OCR results into a JSON object with bounding box IDs.
Args:
results (list): The list of OCR results containing bounding box coordinates, text, and confidence.
Returns:
dict: JSON data with bounding boxes and text.
"""
json_data = []
for item_id, result in enumerate(results, start=1):
bounding_box = result[0]
text = result[1][0]
confidence = result[1][1]
json_data.append({
"id": item_id,
"bounding_box": bounding_box,
"text": text,
"confidence": confidence
})
return json_data
def extract_field_value_pairs(text):
"""
Extract field-value pairs from the text using a pre-trained NLP model.
Args:
text (str): The text to be processed.
Returns:
dict: A dictionary with field-value pairs.
"""
nlp = pipeline("ner", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")
ner_results = []
chunk_size = 256
for i in range(0, len(text), chunk_size):
chunk = text[i:i+chunk_size]
ner_results.extend(nlp(chunk))
field_value_pairs = {}
current_field = None
for entity in ner_results:
word = entity['word']
for field in FIELDS:
if field.lower() in word.lower():
current_field = field
break
if current_field and entity['entity'] == "LABEL_1":
field_value_pairs[current_field] = word
return field_value_pairs
def process_image(image):
"""
Process the uploaded image and perform OCR.
Args:
image (PIL Image): The input image.
Returns:
tuple: The image with bounding boxes, OCR results in JSON format, and field-value pairs.
"""
# Perform OCR on the image
ocr_results = ocr.ocr(np.array(image), cls=True)
# Draw boxes on the image
image_with_boxes = draw_boxes_on_image(image.copy(), ocr_results[0])
# Convert OCR results to JSON
json_results = convert_to_json(ocr_results[0])
json_results_path = 'ocr_results.json' # Save in the root directory
with open(json_results_path, "w") as f:
json.dump(json_results, f, indent=4)
# Extract field-value pairs from the text
text = " ".join([result[1][0] for result in ocr_results[0]])
field_value_pairs = extract_field_value_pairs(text)
field_value_pairs_path = 'extracted_fields.json' # Save in the root directory
with open(field_value_pairs_path, "w") as f:
json.dump(field_value_pairs, f, indent=4)
return image_with_boxes, json_results_path, field_value_pairs_path
# Define Gradio interface
iface = gr.Interface(
fn=process_image,
inputs=gr.Image(type="pil"),
outputs=[
gr.Image(type="pil"),
gr.File(label="Download OCR Results"),
gr.File(label="Download Extracted Fields")
],
live=True
)
if __name__ == "__main__":
iface.launch()
|