File size: 4,551 Bytes
6a83e83
5e4f057
ad14750
 
 
 
024ddf4
ad14750
 
 
 
 
024ddf4
 
 
ad14750
024ddf4
 
 
 
 
 
 
 
 
 
 
 
ad14750
024ddf4
ad14750
 
 
024ddf4
ad14750
1939ca9
ad14750
 
 
 
 
024ddf4
ad14750
024ddf4
ad14750
024ddf4
1939ca9
024ddf4
 
1939ca9
024ddf4
 
 
 
1939ca9
 
024ddf4
1939ca9
 
 
 
024ddf4
 
1939ca9
 
 
024ddf4
1939ca9
024ddf4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad14750
024ddf4
 
 
 
 
 
 
 
 
 
ad14750
 
 
024ddf4
 
 
 
 
 
 
 
 
 
ad14750
 
024ddf4
 
ad14750
024ddf4
 
e3ae3eb
ac5b07c
 
1939ca9
024ddf4
 
 
e3ae3eb
ac5b07c
 
024ddf4
ac5b07c
 
024ddf4
 
 
 
ac5b07c
 
 
 
 
024ddf4
ad14750
 
 
5e4f057
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import json
from paddleocr import PaddleOCR
from transformers import pipeline
import gradio as gr

# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')

# Predefined fields for extraction
FIELDS = ["Scheme Name", "Folio Number", "Number of Units", "PAN", "Signature", "Tax Status",
          "Mobile Number", "Email", "Address", "Bank Account Details"]

def draw_boxes_on_image(image, data):
    """
    Draw bounding boxes and text on the image.
    
    Args:
        image (PIL Image): The input image.
        data (list): OCR results containing bounding boxes and detected text.
    
    Returns:
        PIL Image: The image with drawn boxes.
    """
    draw = ImageDraw.Draw(image)
    try:
        font = ImageFont.truetype("arial.ttf", 20)
    except IOError:
        font = ImageFont.load_default()

    for item_id, item in enumerate(data, start=1):
        bounding_box, (text, confidence) = item
        box = np.array(bounding_box).astype(int)
        draw.line([tuple(box[0]), tuple(box[1])], fill="green", width=2)
        draw.line([tuple(box[1]), tuple(box[2])], fill="green", width=2)
        draw.line([tuple(box[2]), tuple(box[3])], fill="green", width=2)
        draw.line([tuple(box[3]), tuple(box[0])], fill="green", width=2)
        text_position = (box[0][0], box[0][1] - 20)
        draw.text(text_position, f"{item_id}: {text} ({confidence:.2f})", fill="red", font=font)

    return image

def convert_to_json(results):
    """
    Converts the OCR results into a JSON object with bounding box IDs.
    
    Args:
        results (list): The list of OCR results containing bounding box coordinates, text, and confidence.
    
    Returns:
        dict: JSON data with bounding boxes and text.
    """
    json_data = []
    for item_id, result in enumerate(results, start=1):
        bounding_box = result[0]
        text = result[1][0]
        confidence = result[1][1]
        json_data.append({
            "id": item_id,
            "bounding_box": bounding_box,
            "text": text,
            "confidence": confidence
        })
    return json_data

def extract_field_value_pairs(text):
    """
    Extract field-value pairs from the text using a pre-trained NLP model.
    
    Args:
        text (str): The text to be processed.
    
    Returns:
        dict: A dictionary with field-value pairs.
    """
    nlp = pipeline("ner", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")
    ner_results = []
    chunk_size = 256
    for i in range(0, len(text), chunk_size):
        chunk = text[i:i+chunk_size]
        ner_results.extend(nlp(chunk))
    
    field_value_pairs = {}
    current_field = None
    for entity in ner_results:
        word = entity['word']
        for field in FIELDS:
            if field.lower() in word.lower():
                current_field = field
                break
        if current_field and entity['entity'] == "LABEL_1":
            field_value_pairs[current_field] = word
    
    return field_value_pairs

def process_image(image):
    """
    Process the uploaded image and perform OCR.
    
    Args:
        image (PIL Image): The input image.
    
    Returns:
        tuple: The image with bounding boxes, OCR results in JSON format, and field-value pairs.
    """
    # Perform OCR on the image
    ocr_results = ocr.ocr(np.array(image), cls=True)
    
    # Draw boxes on the image
    image_with_boxes = draw_boxes_on_image(image.copy(), ocr_results[0])
    
    # Convert OCR results to JSON
    json_results = convert_to_json(ocr_results[0])
    json_results_path = 'ocr_results.json'  # Save in the root directory
    with open(json_results_path, "w") as f:
        json.dump(json_results, f, indent=4)
    
    # Extract field-value pairs from the text
    text = " ".join([result[1][0] for result in ocr_results[0]])
    field_value_pairs = extract_field_value_pairs(text)
    field_value_pairs_path = 'extracted_fields.json'  # Save in the root directory
    with open(field_value_pairs_path, "w") as f:
        json.dump(field_value_pairs, f, indent=4)
    
    return image_with_boxes, json_results_path, field_value_pairs_path

# Define Gradio interface
iface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="pil"),
    outputs=[
        gr.Image(type="pil"),
        gr.File(label="Download OCR Results"),
        gr.File(label="Download Extracted Fields")
    ],
    live=True
)

if __name__ == "__main__":
    iface.launch()