Spaces:

Prasada
/

OCR_Demo

Sleeping

App Files Files Community

Prasada commited on Sep 8

Commit

024ddf4

•

1 Parent(s): 348ac05

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -74

app.py CHANGED Viewed

@@ -3,119 +3,130 @@ import numpy as np
 from PIL import Image, ImageDraw, ImageFont
 import json
 from paddleocr import PaddleOCR
 import gradio as gr
-import os
 # Initialize PaddleOCR
 ocr = PaddleOCR(use_angle_cls=True, lang='en')
-# Function to draw bounding boxes on the image
-def draw_boxes_on_image(image, data):
-    # Convert the image to RGB (OpenCV uses BGR by default)
-    image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
-    # Load the image into PIL for easier drawing
-    pil_image = Image.fromarray(image_rgb)
-    draw = ImageDraw.Draw(pil_image)
-    # Define a font (using DejaVuSans since it's available by default)
     try:
-        font = ImageFont.truetype("DejaVuSans.ttf", 20)
     except IOError:
         font = ImageFont.load_default()
-    for item in data:
         bounding_box, (text, confidence) = item
-        # Ensure bounding_box is a list of lists
-        if not isinstance(bounding_box[0], list):
-            bounding_box = [bounding_box]
         box = np.array(bounding_box).astype(int)
-        # Draw the bounding box
         draw.line([tuple(box[0]), tuple(box[1])], fill="green", width=2)
         draw.line([tuple(box[1]), tuple(box[2])], fill="green", width=2)
         draw.line([tuple(box[2]), tuple(box[3])], fill="green", width=2)
         draw.line([tuple(box[3]), tuple(box[0])], fill="green", width=2)
-        # Draw the text above the bounding box
         text_position = (box[0][0], box[0][1] - 20)
-        draw.text(text_position, f"{text} ({confidence:.2f})", fill="red", font=font)
-    return pil_image
-# Function to convert OCR results to JSON
-def convert_to_json(results, output_file):
     """
-    Converts the given results into a JSON file.
     Args:
-        results: The list of results containing bounding box coordinates, text, and confidence.
-        output_file: The name of the output JSON file.
     """
     json_data = []
-    for result in results:
         bounding_box = result[0]
         text = result[1][0]
         confidence = result[1][1]
         json_data.append({
-            "bounding_box": [list(map(float, coord)) for coord in bounding_box],
             "text": text,
             "confidence": confidence
         })
-    with open(output_file, "w") as f:
-        json.dump(json_data, f, indent=4)
-# Function to identify 'field', 'value' pairs
-def identify_field_value_pairs(ocr_results, fields):
     field_value_pairs = {}
-    for line in ocr_results:
-        for word_info in line:
-            text, _ = word_info[1]
-            for field in fields:
-                if field.lower() in text.lower():
-                    # Assuming the value comes immediately after the field
-                    value_index = line.index(word_info) + 1
-                    if value_index < len(line):
-                        field_value_pairs[field] = line[value_index][1][0]
-                    break
     return field_value_pairs
-# Function to process the image and generate outputs
 def process_image(image):
     ocr_results = ocr.ocr(np.array(image), cls=True)
-    processed_image = draw_boxes_on_image(image, ocr_results[0])
-    # Save OCR results to JSON
-    json_path = "ocr_results.json"
-    convert_to_json(ocr_results[0], json_path)
-    # Identify field-value pairs
-    fields = ["Scheme Name", "Folio Number", "Number of Units", "PAN", "Signature", "Tax Status",
-              "Mobile Number", "Email", "Address", "Bank Account Details"]
-    field_value_pairs = identify_field_value_pairs(ocr_results[0], fields)
-    field_value_json_path = "field_value_pairs.json"
-    with open(field_value_json_path, 'w') as json_file:
-        json.dump(field_value_pairs, json_file, indent=4)
-    return processed_image, json_path, field_value_json_path
-# Gradio Interface
-interface = gr.Interface(
-    fn=process_image,
-    inputs="image",
-    outputs=[
-        "image",
-        gr.File(label="Download OCR Results JSON"),
-        gr.File(label="Download Field-Value Pairs JSON")
-    ],
-    title="OCR Web Application",
-    description="Upload an image and get OCR results with bounding boxes and two JSON outputs."
 )
 if __name__ == "__main__":
-    interface.launch()

 from PIL import Image, ImageDraw, ImageFont
 import json
 from paddleocr import PaddleOCR
+from transformers import pipeline
 import gradio as gr
 # Initialize PaddleOCR
 ocr = PaddleOCR(use_angle_cls=True, lang='en')
+# Predefined fields for extraction
+FIELDS = ["Scheme Name", "Folio Number", "Number of Units", "PAN", "Signature", "Tax Status",
+          "Mobile Number", "Email", "Address", "Bank Account Details"]
+def draw_boxes_on_image(image, data):
+    """
+    Draw bounding boxes and text on the image.
+    Args:
+        image (PIL Image): The input image.
+        data (list): OCR results containing bounding boxes and detected text.
+    Returns:
+        PIL Image: The image with drawn boxes.
+    """
+    draw = ImageDraw.Draw(image)
     try:
+        font = ImageFont.truetype("arial.ttf", 20)
     except IOError:
         font = ImageFont.load_default()
+    for item_id, item in enumerate(data, start=1):
         bounding_box, (text, confidence) = item
         box = np.array(bounding_box).astype(int)
         draw.line([tuple(box[0]), tuple(box[1])], fill="green", width=2)
         draw.line([tuple(box[1]), tuple(box[2])], fill="green", width=2)
         draw.line([tuple(box[2]), tuple(box[3])], fill="green", width=2)
         draw.line([tuple(box[3]), tuple(box[0])], fill="green", width=2)
         text_position = (box[0][0], box[0][1] - 20)
+        draw.text(text_position, f"{item_id}: {text} ({confidence:.2f})", fill="red", font=font)
+    return image
+def convert_to_json(results):
     """
+    Converts the OCR results into a JSON object with bounding box IDs.
     Args:
+        results (list): The list of OCR results containing bounding box coordinates, text, and confidence.
+    Returns:
+        dict: JSON data with bounding boxes and text.
     """
     json_data = []
+    for item_id, result in enumerate(results, start=1):
         bounding_box = result[0]
         text = result[1][0]
         confidence = result[1][1]
         json_data.append({
+            "id": item_id,
+            "bounding_box": bounding_box,
             "text": text,
             "confidence": confidence
         })
+    return json_data
+def extract_field_value_pairs(text):
+    """
+    Extract field-value pairs from the text using a pre-trained NLP model.
+    Args:
+        text (str): The text to be processed.
+    Returns:
+        dict: A dictionary with field-value pairs.
+    """
+    nlp = pipeline("ner", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")
+    ner_results = []
+    chunk_size = 256
+    for i in range(0, len(text), chunk_size):
+        chunk = text[i:i+chunk_size]
+        ner_results.extend(nlp(chunk))
     field_value_pairs = {}
+    current_field = None
+    for entity in ner_results:
+        word = entity['word']
+        for field in FIELDS:
+            if field.lower() in word.lower():
+                current_field = field
+                break
+        if current_field and entity['entity'] == "LABEL_1":
+            field_value_pairs[current_field] = word
     return field_value_pairs
 def process_image(image):
+    """
+    Process the uploaded image and perform OCR.
+    Args:
+        image (PIL Image): The input image.
+    Returns:
+        tuple: The image with bounding boxes, OCR results in JSON format, and field-value pairs.
+    """
+    # Perform OCR on the image
     ocr_results = ocr.ocr(np.array(image), cls=True)
+    # Draw boxes on the image
+    image_with_boxes = draw_boxes_on_image(image.copy(), ocr_results[0])
+    # Convert OCR results to JSON
+    json_results = convert_to_json(ocr_results[0])
+    # Extract field-value pairs from the text
+    text = " ".join([result[1][0] for result in ocr_results[0]])
+    field_value_pairs = extract_field_value_pairs(text)
+    return image_with_boxes, json_results, field_value_pairs
+# Define Gradio interface
+iface = gr.Interface(
+    fn=process_image,
+    inputs=gr.Image(type="pil"),
+    outputs=[gr.Image(type="pil"), gr.JSON(), gr.JSON()],
+    live=True
 )
 if __name__ == "__main__":
+    iface.launch()