sachitksh123 commited on
Commit
cb49d9f
·
verified ·
1 Parent(s): 8de1162

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -0
app.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from paddleocr import PaddleOCR
2
+ import cv2
3
+
4
+ def ocr_with_paddle(img_path):
5
+ finaltext = ''
6
+ ocr = PaddleOCR(lang='en', use_angle_cls=True)
7
+
8
+ # Read the image using OpenCV (this is just one way of loading the image)
9
+ img = cv2.imread(img_path)
10
+
11
+ # Run OCR on the image
12
+ result = ocr.ocr(img)
13
+
14
+ # Extract text from the OCR result
15
+ for line in result[0]: # iterate through the detected lines
16
+ for word_info in line:
17
+ # Check if word_info[1] is a list (the first element of word_info should be the text)
18
+ if isinstance(word_info[1], list):
19
+ text = word_info[1][0]
20
+ text=str(text) # The recognized text is in the second element (index 1)
21
+ finaltext += text + ' ' # Append each detected word followed by a space
22
+ else:
23
+ # If word_info[1] is not a list (e.g., if it's a float), skip or handle the case
24
+ print(f"Skipping invalid word_info: {word_info}")
25
+
26
+ return finaltext.strip() # return the cleaned final text
27
+
28
+ # Example usage:
29
+ img_path = 'invoice-c56a1861.png'
30
+ text = ocr_with_paddle(img_path)
31
+ print(text)