Spaces:

Prasada
/

OCR_Demo

Sleeping

File size: 4,551 Bytes

6a83e83
5e4f057
ad14750
 
 
 
024ddf4
ad14750
 
 
 
 
024ddf4
 
 
ad14750
024ddf4
 
 
 
 
 
 
 
 
 
 
 
ad14750
024ddf4
ad14750
 
 
024ddf4
ad14750
1939ca9
ad14750
 
 
 
 
024ddf4
ad14750
024ddf4
ad14750
024ddf4
1939ca9
024ddf4
 
1939ca9
024ddf4
 
 
 
1939ca9
 
024ddf4
1939ca9
 
 
 
024ddf4
 
1939ca9
 
 
024ddf4
1939ca9
024ddf4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad14750
024ddf4
 
 
 
 
 
 
 
 
 
ad14750
 
 
024ddf4
 
 
 
 
 
 
 
 
 
ad14750
 
024ddf4
 
ad14750
024ddf4
 
e3ae3eb
ac5b07c
 
1939ca9
024ddf4
 
 
e3ae3eb
ac5b07c
 
024ddf4
ac5b07c
 
024ddf4
 
 
 
ac5b07c
 
 
 
 
024ddf4
ad14750
 
 
5e4f057

import os
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import json
from paddleocr import PaddleOCR
from transformers import pipeline
import gradio as gr

# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')

# Predefined fields for extraction
FIELDS = ["Scheme Name", "Folio Number", "Number of Units", "PAN", "Signature", "Tax Status",
          "Mobile Number", "Email", "Address", "Bank Account Details"]

def draw_boxes_on_image(image, data):
    """
    Draw bounding boxes and text on the image.
    
    Args:
        image (PIL Image): The input image.
        data (list): OCR results containing bounding boxes and detected text.
    
    Returns:
        PIL Image: The image with drawn boxes.
    """
    draw = ImageDraw.Draw(image)
    try:
        font = ImageFont.truetype("arial.ttf", 20)
    except IOError:
        font = ImageFont.load_default()

    for item_id, item in enumerate(data, start=1):
        bounding_box, (text, confidence) = item
        box = np.array(bounding_box).astype(int)
        draw.line([tuple(box[0]), tuple(box[1])], fill="green", width=2)
        draw.line([tuple(box[1]), tuple(box[2])], fill="green", width=2)
        draw.line([tuple(box[2]), tuple(box[3])], fill="green", width=2)
        draw.line([tuple(box[3]), tuple(box[0])], fill="green", width=2)
        text_position = (box[0][0], box[0][1] - 20)
        draw.text(text_position, f"{item_id}: {text} ({confidence:.2f})", fill="red", font=font)

    return image

def convert_to_json(results):
    """
    Converts the OCR results into a JSON object with bounding box IDs.
    
    Args:
        results (list): The list of OCR results containing bounding box coordinates, text, and confidence.
    
    Returns:
        dict: JSON data with bounding boxes and text.
    """
    json_data = []
    for item_id, result in enumerate(results, start=1):
        bounding_box = result[0]
        text = result[1][0]
        confidence = result[1][1]
        json_data.append({
            "id": item_id,
            "bounding_box": bounding_box,
            "text": text,
            "confidence": confidence
        })
    return json_data

def extract_field_value_pairs(text):
    """
    Extract field-value pairs from the text using a pre-trained NLP model.
    
    Args:
        text (str): The text to be processed.
    
    Returns:
        dict: A dictionary with field-value pairs.
    """
    nlp = pipeline("ner", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")
    ner_results = []
    chunk_size = 256
    for i in range(0, len(text), chunk_size):
        chunk = text[i:i+chunk_size]
        ner_results.extend(nlp(chunk))
    
    field_value_pairs = {}
    current_field = None
    for entity in ner_results:
        word = entity['word']
        for field in FIELDS:
            if field.lower() in word.lower():
                current_field = field
                break
        if current_field and entity['entity'] == "LABEL_1":
            field_value_pairs[current_field] = word
    
    return field_value_pairs

def process_image(image):
    """
    Process the uploaded image and perform OCR.
    
    Args:
        image (PIL Image): The input image.
    
    Returns:
        tuple: The image with bounding boxes, OCR results in JSON format, and field-value pairs.
    """
    # Perform OCR on the image
    ocr_results = ocr.ocr(np.array(image), cls=True)
    
    # Draw boxes on the image
    image_with_boxes = draw_boxes_on_image(image.copy(), ocr_results[0])
    
    # Convert OCR results to JSON
    json_results = convert_to_json(ocr_results[0])
    json_results_path = 'ocr_results.json'  # Save in the root directory
    with open(json_results_path, "w") as f:
        json.dump(json_results, f, indent=4)
    
    # Extract field-value pairs from the text
    text = " ".join([result[1][0] for result in ocr_results[0]])
    field_value_pairs = extract_field_value_pairs(text)
    field_value_pairs_path = 'extracted_fields.json'  # Save in the root directory
    with open(field_value_pairs_path, "w") as f:
        json.dump(field_value_pairs, f, indent=4)
    
    return image_with_boxes, json_results_path, field_value_pairs_path

# Define Gradio interface
iface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="pil"),
    outputs=[
        gr.Image(type="pil"),
        gr.File(label="Download OCR Results"),
        gr.File(label="Download Extracted Fields")
    ],
    live=True
)

if __name__ == "__main__":
    iface.launch()