Spaces:

sachitksh123
/

paddle_ocr

Sleeping

File size: 1,148 Bytes

cb49d9f

from paddleocr import PaddleOCR
import cv2

def ocr_with_paddle(img_path):
    finaltext = ''
    ocr = PaddleOCR(lang='en', use_angle_cls=True)

    # Read the image using OpenCV (this is just one way of loading the image)
    img = cv2.imread(img_path)

    # Run OCR on the image
    result = ocr.ocr(img)

    # Extract text from the OCR result
    for line in result[0]:  # iterate through the detected lines
        for word_info in line:
            # Check if word_info[1] is a list (the first element of word_info should be the text)
            if isinstance(word_info[1], list):
                text = word_info[1][0] 
                text=str(text) # The recognized text is in the second element (index 1)
                finaltext += text + ' '  # Append each detected word followed by a space
            else:
                # If word_info[1] is not a list (e.g., if it's a float), skip or handle the case
                print(f"Skipping invalid word_info: {word_info}")

    return finaltext.strip()  # return the cleaned final text

# Example usage:
img_path = 'invoice-c56a1861.png'
text = ocr_with_paddle(img_path)
print(text)