|
import gradio as gr |
|
import cv2 |
|
import pytesseract |
|
import numpy as np |
|
|
|
|
|
|
|
|
|
def preprocess_image(image): |
|
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
|
|
|
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, |
|
cv2.THRESH_BINARY, 11, 2) |
|
|
|
|
|
edges = cv2.Canny(thresh, 50, 150, apertureSize=3) |
|
|
|
|
|
contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) |
|
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5] |
|
|
|
|
|
for contour in contours: |
|
epsilon = 0.02 * cv2.arcLength(contour, True) |
|
approx = cv2.approxPolyDP(contour, epsilon, True) |
|
if len(approx) == 4: |
|
paper_contour = approx |
|
break |
|
else: |
|
paper_contour = None |
|
|
|
|
|
if paper_contour is not None: |
|
pts = paper_contour.reshape(4, 2) |
|
rect = np.zeros((4, 2), dtype="float32") |
|
|
|
|
|
s = pts.sum(axis=1) |
|
rect[0] = pts[np.argmin(s)] |
|
rect[2] = pts[np.argmax(s)] |
|
|
|
|
|
diff = np.diff(pts, axis=1) |
|
rect[1] = pts[np.argmin(diff)] |
|
rect[3] = pts[np.argmax(diff)] |
|
|
|
|
|
(tl, tr, br, bl) = rect |
|
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) |
|
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) |
|
maxWidth = max(int(widthA), int(widthB)) |
|
|
|
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) |
|
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) |
|
maxHeight = max(int(heightA), int(heightB)) |
|
|
|
dst = np.array([ |
|
[0, 0], |
|
[maxWidth - 1, 0], |
|
[maxWidth - 1, maxHeight - 1], |
|
[0, maxHeight - 1]], dtype="float32") |
|
|
|
M = cv2.getPerspectiveTransform(rect, dst) |
|
warped = cv2.warpPerspective(gray, M, (maxWidth, maxHeight)) |
|
else: |
|
|
|
warped = gray |
|
|
|
return warped |
|
|
|
def ocr_process(image): |
|
|
|
processed_image = preprocess_image(image) |
|
|
|
|
|
text = pytesseract.image_to_string(processed_image, lang='eng') |
|
|
|
return text |
|
|
|
|
|
iface = gr.Interface( |
|
fn=ocr_process, |
|
inputs=gr.Image(type="numpy"), |
|
outputs="text", |
|
title="轻量级 OCR 应用", |
|
description="上传带角度的纸张图片,自动校正并提取文字" |
|
) |
|
|
|
iface.launch() |
|
|