ocrappyh / app.py
ake178178's picture
Create app.py
98becde verified
raw
history blame
2.83 kB
import gradio as gr
import cv2
import pytesseract
import numpy as np
# 设置 Tesseract OCR 路径(如果需要)
# pytesseract.pytesseract.tesseract_cmd = r"/path/to/tesseract"
def preprocess_image(image):
# 将图像转为灰度
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 应用自适应阈值以获得更好的 OCR 效果
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
# 使用边缘检测
edges = cv2.Canny(thresh, 50, 150, apertureSize=3)
# 查找轮廓
contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
# 尝试找到纸张的四边形轮廓
for contour in contours:
epsilon = 0.02 * cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, epsilon, True)
if len(approx) == 4:
paper_contour = approx
break
else:
paper_contour = None
# 如果找到轮廓,进行透视变换
if paper_contour is not None:
pts = paper_contour.reshape(4, 2)
rect = np.zeros((4, 2), dtype="float32")
# 左上和右下
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# 右上和左下
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# 计算新的变换矩阵
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype="float32")
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(gray, M, (maxWidth, maxHeight))
else:
# 无法找到四边形,返回灰度图像
warped = gray
return warped
def ocr_process(image):
# 图像预处理
processed_image = preprocess_image(image)
# OCR 识别
text = pytesseract.image_to_string(processed_image, lang='eng')
return text
# 使用 Gradio 创建界面
iface = gr.Interface(
fn=ocr_process,
inputs=gr.Image(type="numpy"),
outputs="text",
title="轻量级 OCR 应用",
description="上传带角度的纸张图片,自动校正并提取文字"
)
iface.launch()