Spaces:

ake178178
/

ocrappyh

Sleeping

App Files Files Community

ocrappyh / app.py

ake178178

Create app.py

98becde verified 3 months ago

raw

history blame

2.83 kB

	import gradio as gr
	import cv2
	import pytesseract
	import numpy as np

	# 设置 Tesseract OCR 路径（如果需要）
	# pytesseract.pytesseract.tesseract_cmd = r"/path/to/tesseract"

	def preprocess_image(image):
	# 将图像转为灰度
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	# 应用自适应阈值以获得更好的 OCR 效果
	thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY, 11, 2)

	# 使用边缘检测
	edges = cv2.Canny(thresh, 50, 150, apertureSize=3)

	# 查找轮廓
	contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
	contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]

	# 尝试找到纸张的四边形轮廓
	for contour in contours:
	epsilon = 0.02 * cv2.arcLength(contour, True)
	approx = cv2.approxPolyDP(contour, epsilon, True)
	if len(approx) == 4:
	paper_contour = approx
	break
	else:
	paper_contour = None

	# 如果找到轮廓，进行透视变换
	if paper_contour is not None:
	pts = paper_contour.reshape(4, 2)
	rect = np.zeros((4, 2), dtype="float32")

	# 左上和右下
	s = pts.sum(axis=1)
	rect[0] = pts[np.argmin(s)]
	rect[2] = pts[np.argmax(s)]

	# 右上和左下
	diff = np.diff(pts, axis=1)
	rect[1] = pts[np.argmin(diff)]
	rect[3] = pts[np.argmax(diff)]

	# 计算新的变换矩阵
	(tl, tr, br, bl) = rect
	widthA = np.sqrt(((br[0] - bl[0]) 2) + ((br[1] - bl[1]) 2))
	widthB = np.sqrt(((tr[0] - tl[0]) 2) + ((tr[1] - tl[1]) 2))
	maxWidth = max(int(widthA), int(widthB))

	heightA = np.sqrt(((tr[0] - br[0]) 2) + ((tr[1] - br[1]) 2))
	heightB = np.sqrt(((tl[0] - bl[0]) 2) + ((tl[1] - bl[1]) 2))
	maxHeight = max(int(heightA), int(heightB))

	dst = np.array([
	[0, 0],
	[maxWidth - 1, 0],
	[maxWidth - 1, maxHeight - 1],
	[0, maxHeight - 1]], dtype="float32")

	M = cv2.getPerspectiveTransform(rect, dst)
	warped = cv2.warpPerspective(gray, M, (maxWidth, maxHeight))
	else:
	# 无法找到四边形，返回灰度图像
	warped = gray

	return warped

	def ocr_process(image):
	# 图像预处理
	processed_image = preprocess_image(image)

	# OCR 识别
	text = pytesseract.image_to_string(processed_image, lang='eng')

	return text

	# 使用 Gradio 创建界面
	iface = gr.Interface(
	fn=ocr_process,
	inputs=gr.Image(type="numpy"),
	outputs="text",
	title="轻量级 OCR 应用",
	description="上传带角度的纸张图片，自动校正并提取文字"
	)

	iface.launch()