Prasada commited on
Commit
ad14750
1 Parent(s): 103f0fb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from PIL import Image, ImageDraw, ImageFont
4
+ import json
5
+ from paddleocr import PaddleOCR
6
+ import gradio as gr
7
+ import os
8
+
9
+ # Initialize PaddleOCR
10
+ ocr = PaddleOCR(use_angle_cls=True, lang='en')
11
+
12
+ # Function to draw bounding boxes on the image
13
+ def draw_boxes_on_image(image, data):
14
+ # Convert the image to RGB (OpenCV uses BGR by default)
15
+ image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
16
+
17
+ # Load the image into PIL for easier drawing
18
+ pil_image = Image.fromarray(image_rgb)
19
+ draw = ImageDraw.Draw(pil_image)
20
+
21
+ # Define a font (using DejaVuSans since it's available by default)
22
+ try:
23
+ font = ImageFont.truetype("DejaVuSans.ttf", 20)
24
+ except IOError:
25
+ font = ImageFont.load_default()
26
+
27
+ for item in data:
28
+ bounding_box, (text, confidence) = item
29
+
30
+ # Convert bounding box to integer
31
+ box = np.array(bounding_box).astype(int)
32
+
33
+ # Draw the bounding box
34
+ draw.line([tuple(box[0]), tuple(box[1])], fill="green", width=2)
35
+ draw.line([tuple(box[1]), tuple(box[2])], fill="green", width=2)
36
+ draw.line([tuple(box[2]), tuple(box[3])], fill="green", width=2)
37
+ draw.line([tuple(box[3]), tuple(box[0])], fill="green", width=2)
38
+
39
+ # Draw the text above the bounding box
40
+ text_position = (box[0][0], box[0][1] - 20)
41
+ draw.text(text_position, f"{text} ({confidence:.2f})", fill="red", font=font)
42
+
43
+ return pil_image
44
+
45
+ # Function to save OCR results to JSON
46
+ def save_results_to_json(ocr_results):
47
+ results = []
48
+
49
+ for line in ocr_results:
50
+ for word_info in line:
51
+ bounding_box = word_info[0]
52
+ text, confidence = word_info[1]
53
+ results.append({
54
+ "bounding_box": [list(map(float, coord)) for coord in bounding_box],
55
+ "text": text,
56
+ "confidence": confidence
57
+ })
58
+
59
+ return results
60
+
61
+ # Function to identify 'field', 'value' pairs
62
+ def identify_field_value_pairs(ocr_results, fields):
63
+ field_value_pairs = {}
64
+ for line in ocr_results:
65
+ for word_info in line:
66
+ text, _ = word_info[1]
67
+ for field in fields:
68
+ if field.lower() in text.lower():
69
+ # Assuming the value comes immediately after the field
70
+ value_index = line.index(word_info) + 1
71
+ if value_index < len(line):
72
+ field_value_pairs[field] = line[value_index][1][0]
73
+ break
74
+ return field_value_pairs
75
+
76
+ # Function to process the image and generate outputs
77
+ def process_image(image):
78
+ ocr_results = ocr.ocr(np.array(image), cls=True)
79
+ processed_image = draw_boxes_on_image(image, ocr_results[0])
80
+
81
+ # Save OCR results to JSON
82
+ results_json = save_results_to_json(ocr_results[0])
83
+ json_path = "ocr_results.json"
84
+ with open(json_path, 'w') as json_file:
85
+ json.dump(results_json, json_file, indent=4)
86
+
87
+ # Identify field-value pairs
88
+ fields = ["Scheme Name", "Folio Number", "Number of Units", "PAN", "Signature", "Tax Status",
89
+ "Mobile Number", "Email", "Address", "Bank Account Details"]
90
+ field_value_pairs = identify_field_value_pairs(ocr_results[0], fields)
91
+ field_value_json_path = "field_value_pairs.json"
92
+ with open(field_value_json_path, 'w') as json_file:
93
+ json.dump(field_value_pairs, json_file, indent=4)
94
+
95
+ return processed_image, json_path, field_value_json_path
96
+
97
+ # Gradio Interface
98
+ interface = gr.Interface(
99
+ fn=process_image,
100
+ inputs="image",
101
+ outputs=[
102
+ "image",
103
+ gr.File(label="OCR Results JSON"),
104
+ gr.File(label="Field-Value Pairs JSON")
105
+ ],
106
+ title="OCR Web Application",
107
+ description="Upload an image and get OCR results with bounding boxes and two JSON outputs."
108
+ )
109
+
110
+ if __name__ == "__main__":
111
+ interface.launch()