Spaces:
Sleeping
Sleeping
from paddleocr import PaddleOCR | |
import cv2 | |
def ocr_with_paddle(img_path): | |
finaltext = '' | |
ocr = PaddleOCR(lang='en', use_angle_cls=True) | |
# Read the image using OpenCV (this is just one way of loading the image) | |
img = cv2.imread(img_path) | |
# Run OCR on the image | |
result = ocr.ocr(img) | |
# Extract text from the OCR result | |
for line in result[0]: # iterate through the detected lines | |
for word_info in line: | |
# Check if word_info[1] is a list (the first element of word_info should be the text) | |
if isinstance(word_info[1], list): | |
text = word_info[1][0] | |
text=str(text) # The recognized text is in the second element (index 1) | |
finaltext += text + ' ' # Append each detected word followed by a space | |
else: | |
# If word_info[1] is not a list (e.g., if it's a float), skip or handle the case | |
print(f"Skipping invalid word_info: {word_info}") | |
return finaltext.strip() # return the cleaned final text | |
# Example usage: | |
img_path = 'invoice-c56a1861.png' | |
text = ocr_with_paddle(img_path) | |
print(text) | |