MahmoudAbdelmaged commited on
Commit
1764879
·
verified ·
1 Parent(s): a2d4ad8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -39
app.py CHANGED
@@ -1,47 +1,55 @@
1
- import gradio as gr
2
- import torch
3
- from transformers import LayoutLMv3ForTokenClassification, LayoutLMv3Processor
 
 
4
  import easyocr
5
- import os
6
-
7
- # Set a persistent cache directory for EasyOCR
8
- os.environ['EASYOCR_HOME'] = '/root/.easyocr'
9
-
10
- # Pre-download the EasyOCR detection model
11
- print("Downloading EasyOCR detection model...")
12
- reader = easyocr.Reader(['en'], download_enabled=True)
13
- print("EasyOCR detection model downloaded successfully.")
14
-
15
- # Load the LayoutLMv3 model and processor
16
- print("Loading LayoutLMv3 model...")
17
- model = LayoutLMv3ForTokenClassification.from_pretrained("jordyvl/EElayoutlmv3_jordyvl_rvl_cdip_easyocr_2023-07-09_weighted")
18
- processor = LayoutLMv3Processor.from_pretrained("jordyvl/EElayoutlmv3_jordyvl_rvl_cdip_easyocr_2023-07-09_weighted")
19
- print("LayoutLMv3 model loaded successfully.")
20
-
21
- def process_image(image):
22
- # Perform OCR on the input image
23
- print("Performing OCR...")
24
- ocr_result = reader.readtext(image)
25
- print("OCR completed. Results:", ocr_result)
26
-
27
- # Process the image and OCR result with LayoutLMv3
28
- print("Processing image with LayoutLMv3...")
29
- inputs = processor(image, ocr_result, return_tensors="pt")
30
- outputs = model(**inputs)
31
- logits = outputs.logits
32
-
33
- # Return logits for debugging or further processing
34
- print("Processing completed. Returning results...")
35
- return logits.detach().cpu().numpy()
 
 
 
36
 
37
  # Define the Gradio interface
38
  interface = gr.Interface(
39
- fn=process_image,
40
- inputs=gr.Image(type="pil"),
41
- outputs=gr.Textbox(label="Model Output"),
42
- live=True # Enable live updates for faster debugging
 
 
 
 
43
  )
44
 
45
  # Launch the Gradio app
46
- print("Launching the Gradio interface...")
47
  interface.launch()
 
1
+ # Install necessary libraries
2
+ !pip install easyocr opencv-python gradio
3
+
4
+ # Import required libraries
5
+ import cv2
6
  import easyocr
7
+ import numpy as np
8
+ import gradio as gr
9
+
10
+ # Function to process the uploaded image and extract text
11
+ def extract_text_from_image(image):
12
+ # Save the uploaded image to disk
13
+ image_path = "uploaded_image.jpg"
14
+ cv2.imwrite(image_path, image)
15
+
16
+ # Read the image with OpenCV
17
+ img = cv2.imread(image_path)
18
+
19
+ # Initialize the EasyOCR reader
20
+ reader = easyocr.Reader(['en', 'ar'], gpu=False)
21
+
22
+ # Perform text detection
23
+ results = reader.readtext(image_path)
24
+
25
+ # Draw bounding boxes and overlay text on the image
26
+ conf_threshold = 0.2
27
+ for (bbox, text, conf) in results:
28
+ if conf > conf_threshold:
29
+ # Get coordinates
30
+ top_left = tuple(map(int, bbox[0]))
31
+ bottom_right = tuple(map(int, bbox[2]))
32
+
33
+ # Draw rectangle and text
34
+ img = cv2.rectangle(img, top_left, bottom_right, (0, 0, 255), 2)
35
+ img = cv2.putText(img, text, top_left, cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
36
+
37
+ # Convert the image to RGB (Gradio requires RGB format)
38
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
39
+
40
+ return img, results
41
 
42
  # Define the Gradio interface
43
  interface = gr.Interface(
44
+ fn=extract_text_from_image,
45
+ inputs=gr.Image(type="numpy", label="Upload Image"),
46
+ outputs=[
47
+ gr.Image(type="numpy", label="Processed Image"),
48
+ gr.Text(label="Extracted Text")
49
+ ],
50
+ title="Image Text Extractor",
51
+ description="Upload an image to extract text using EasyOCR.",
52
  )
53
 
54
  # Launch the Gradio app
 
55
  interface.launch()