Spaces:

sachitksh123
/

paddle_ocr

Sleeping

sachitksh123 commited on Nov 6, 2024

Commit

cb49d9f

verified ·

1 Parent(s): 8de1162

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+from paddleocr import PaddleOCR
+import cv2
+def ocr_with_paddle(img_path):
+    finaltext = ''
+    ocr = PaddleOCR(lang='en', use_angle_cls=True)
+    # Read the image using OpenCV (this is just one way of loading the image)
+    img = cv2.imread(img_path)
+    # Run OCR on the image
+    result = ocr.ocr(img)
+    # Extract text from the OCR result
+    for line in result[0]:  # iterate through the detected lines
+        for word_info in line:
+            # Check if word_info[1] is a list (the first element of word_info should be the text)
+            if isinstance(word_info[1], list):
+                text = word_info[1][0]
+                text=str(text) # The recognized text is in the second element (index 1)
+                finaltext += text + ' '  # Append each detected word followed by a space
+            else:
+                # If word_info[1] is not a list (e.g., if it's a float), skip or handle the case
+                print(f"Skipping invalid word_info: {word_info}")
+    return finaltext.strip()  # return the cleaned final text
+# Example usage:
+img_path = 'invoice-c56a1861.png'
+text = ocr_with_paddle(img_path)
+print(text)