File size: 1,148 Bytes
cb49d9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from paddleocr import PaddleOCR
import cv2

def ocr_with_paddle(img_path):
    finaltext = ''
    ocr = PaddleOCR(lang='en', use_angle_cls=True)

    # Read the image using OpenCV (this is just one way of loading the image)
    img = cv2.imread(img_path)

    # Run OCR on the image
    result = ocr.ocr(img)

    # Extract text from the OCR result
    for line in result[0]:  # iterate through the detected lines
        for word_info in line:
            # Check if word_info[1] is a list (the first element of word_info should be the text)
            if isinstance(word_info[1], list):
                text = word_info[1][0] 
                text=str(text) # The recognized text is in the second element (index 1)
                finaltext += text + ' '  # Append each detected word followed by a space
            else:
                # If word_info[1] is not a list (e.g., if it's a float), skip or handle the case
                print(f"Skipping invalid word_info: {word_info}")

    return finaltext.strip()  # return the cleaned final text

# Example usage:
img_path = 'invoice-c56a1861.png'
text = ocr_with_paddle(img_path)
print(text)