Spaces:

Prasada
/

OCR_Demo

Sleeping

App Files Files Community

OCR_Demo / app.py

Prasada

Create app.py

ad14750 verified 2 months ago

raw

history blame

3.87 kB

	import cv2
	import numpy as np
	from PIL import Image, ImageDraw, ImageFont
	import json
	from paddleocr import PaddleOCR
	import gradio as gr
	import os

	# Initialize PaddleOCR
	ocr = PaddleOCR(use_angle_cls=True, lang='en')

	# Function to draw bounding boxes on the image
	def draw_boxes_on_image(image, data):
	# Convert the image to RGB (OpenCV uses BGR by default)
	image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)

	# Load the image into PIL for easier drawing
	pil_image = Image.fromarray(image_rgb)
	draw = ImageDraw.Draw(pil_image)

	# Define a font (using DejaVuSans since it's available by default)
	try:
	font = ImageFont.truetype("DejaVuSans.ttf", 20)
	except IOError:
	font = ImageFont.load_default()

	for item in data:
	bounding_box, (text, confidence) = item

	# Convert bounding box to integer
	box = np.array(bounding_box).astype(int)

	# Draw the bounding box
	draw.line([tuple(box[0]), tuple(box[1])], fill="green", width=2)
	draw.line([tuple(box[1]), tuple(box[2])], fill="green", width=2)
	draw.line([tuple(box[2]), tuple(box[3])], fill="green", width=2)
	draw.line([tuple(box[3]), tuple(box[0])], fill="green", width=2)

	# Draw the text above the bounding box
	text_position = (box[0][0], box[0][1] - 20)
	draw.text(text_position, f"{text} ({confidence:.2f})", fill="red", font=font)

	return pil_image

	# Function to save OCR results to JSON
	def save_results_to_json(ocr_results):
	results = []

	for line in ocr_results:
	for word_info in line:
	bounding_box = word_info[0]
	text, confidence = word_info[1]
	results.append({
	"bounding_box": [list(map(float, coord)) for coord in bounding_box],
	"text": text,
	"confidence": confidence
	})

	return results

	# Function to identify 'field', 'value' pairs
	def identify_field_value_pairs(ocr_results, fields):
	field_value_pairs = {}
	for line in ocr_results:
	for word_info in line:
	text, _ = word_info[1]
	for field in fields:
	if field.lower() in text.lower():
	# Assuming the value comes immediately after the field
	value_index = line.index(word_info) + 1
	if value_index < len(line):
	field_value_pairs[field] = line[value_index][1][0]
	break
	return field_value_pairs

	# Function to process the image and generate outputs
	def process_image(image):
	ocr_results = ocr.ocr(np.array(image), cls=True)
	processed_image = draw_boxes_on_image(image, ocr_results[0])

	# Save OCR results to JSON
	results_json = save_results_to_json(ocr_results[0])
	json_path = "ocr_results.json"
	with open(json_path, 'w') as json_file:
	json.dump(results_json, json_file, indent=4)

	# Identify field-value pairs
	fields = ["Scheme Name", "Folio Number", "Number of Units", "PAN", "Signature", "Tax Status",
	"Mobile Number", "Email", "Address", "Bank Account Details"]
	field_value_pairs = identify_field_value_pairs(ocr_results[0], fields)
	field_value_json_path = "field_value_pairs.json"
	with open(field_value_json_path, 'w') as json_file:
	json.dump(field_value_pairs, json_file, indent=4)

	return processed_image, json_path, field_value_json_path

	# Gradio Interface
	interface = gr.Interface(
	fn=process_image,
	inputs="image",
	outputs=[
	"image",
	gr.File(label="OCR Results JSON"),
	gr.File(label="Field-Value Pairs JSON")
	],
	title="OCR Web Application",
	description="Upload an image and get OCR results with bounding boxes and two JSON outputs."
	)

	if __name__ == "__main__":
	interface.launch()