Spaces:

cdcvd
/

Ocr_check

Sleeping

App Files Files Community

cdcvd commited on Aug 12, 2024

Commit

d912153

verified ·

1 Parent(s): afa3a48

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -30

app.py CHANGED Viewed

@@ -1,20 +1,23 @@
 import os
-from PIL import Image, ImageOps, ImageChops
 import io
 import fitz  # PyMuPDF
 from docx import Document
 from rembg import remove
 import gradio as gr
 from hezar.models import Model
 from ultralytics import YOLO
 import json
 # ایجاد دایرکتوری‌های لازم
 os.makedirs("static", exist_ok=True)
 os.makedirs("output_images", exist_ok=True)
 def remove_readonly(func, path, excinfo):
     os.chmod(path, stat.S_IWRITE)
     func(path)
@@ -22,11 +25,9 @@ def remove_readonly(func, path, excinfo):
 current_dir = os.path.dirname(os.path.abspath(__file__))
 ultralytics_path = os.path.join(current_dir, 'runs')
 if os.path.exists(ultralytics_path):
-        shutil.rmtree(ultralytics_path, onerror=remove_readonly)
 def trim_whitespace(image):
     gray_image = ImageOps.grayscale(image)
     inverted_image = ImageChops.invert(gray_image)
@@ -44,23 +45,27 @@ def convert_pdf_to_images(pdf_path, zoom=2):
         image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
         trimmed_image = trim_whitespace(image)
         images.append(trimmed_image)
     return images
 def convert_docx_to_jpeg(docx_bytes):
-    document = Document(BytesIO(docx_bytes))
     images = []
     for rel in document.part.rels.values():
         if "image" in rel.target_ref:
             image_stream = rel.target_part.blob
-            image = Image.open(BytesIO(image_stream))
-            jpeg_image = BytesIO()
             image.convert('RGB').save(jpeg_image, format="JPEG")
             jpeg_image.seek(0)
             images.append(Image.open(jpeg_image))
     return images
 def remove_background_from_image(image):
-    return remove(image)
 def process_file(input_file):
     file_extension = os.path.splitext(input_file.name)[1].lower()
@@ -77,6 +82,7 @@ def process_file(input_file):
         images = convert_docx_to_jpeg(input_file.name)
         images = [remove_background_from_image(image) for image in images]
     else:
         return "File format not supported."
     input_folder = 'output_images'
@@ -84,14 +90,9 @@ def process_file(input_file):
         if img.mode == 'RGBA':
             img = img.convert('RGB')
         img.save(os.path.join(input_folder, f'image_{i}.jpg'))
     return images
-import shutil
 def run_detection_and_ocr():
     # Load models
     ocr_model = Model.load('hezarai/crnn-fa-printed-96-long')
@@ -100,7 +101,8 @@ def run_detection_and_ocr():
     input_folder = 'output_images'
     yolo_model_check.predict(input_folder, save=True, conf=0.5, save_crop=True)
     output_folder = 'runs/detect/predict'
     crop_folder = os.path.join(output_folder, 'crops')
@@ -109,7 +111,6 @@ def run_detection_and_ocr():
     for filename in os.listdir(input_folder):
         if filename.endswith('.JPEG') or filename.endswith('.jpg'):
             image_path = os.path.join(input_folder, filename)
             if os.path.exists(crop_folder):
                 crops = []
                 for crop_label in os.listdir(crop_folder):
@@ -130,11 +131,11 @@ def run_detection_and_ocr():
                     'image': filename,
                     'crops': crops
                 })
     output_json_path = 'output.json'
     with open(output_json_path, 'w', encoding='utf-8') as f:
         json.dump(results, f, ensure_ascii=False, indent=4)
     return output_json_path
 def predict_text(model, image_path):
@@ -143,26 +144,48 @@ def predict_text(model, image_path):
         image = image.resize((320, 320))
         output = model.predict(image)
         if isinstance(output, list):
-            return ' '.join([item['text'] for item in output])
         return str(output)
     except FileNotFoundError:
         return "N/A"
 def process_numbers(model, image_path):
     results = model(image_path, conf=0.5, save_crop=False)
     detected_objects = []
     for result in results[0].boxes:
         class_id = int(result.cls[0].cpu().numpy())
         label = model.names[class_id]
-        detected_objects.append({'bbox': result.xyxy[0].cpu().numpy().tolist(), 'label': label})
     sorted_objects = sorted(detected_objects, key=lambda x: x['bbox'][0])
     return ''.join([obj['label'] for obj in sorted_objects])
 def gradio_interface(input_file):
     process_file(input_file)
     json_output = run_detection_and_ocr()
     with open(json_output, 'r', encoding='utf-8') as f:
-        return json.load(f)
 iface = gr.Interface(
     fn=gradio_interface,
@@ -172,9 +195,5 @@ iface = gr.Interface(
 )
 if __name__ == "__main__":
-    iface.launch()

 import os
 import io
 import fitz  # PyMuPDF
+from PIL import Image, ImageOps, ImageChops
 from docx import Document
 from rembg import remove
 import gradio as gr
 from hezar.models import Model
 from ultralytics import YOLO
 import json
+import logging
+import shutil
+# تنظیمات لاگ
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # ایجاد دایرکتوری‌های لازم
 os.makedirs("static", exist_ok=True)
 os.makedirs("output_images", exist_ok=True)
 def remove_readonly(func, path, excinfo):
     os.chmod(path, stat.S_IWRITE)
     func(path)
 current_dir = os.path.dirname(os.path.abspath(__file__))
 ultralytics_path = os.path.join(current_dir, 'runs')
 if os.path.exists(ultralytics_path):
+    shutil.rmtree(ultralytics_path, onerror=remove_readonly)
 def trim_whitespace(image):
     gray_image = ImageOps.grayscale(image)
     inverted_image = ImageChops.invert(gray_image)
         image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
         trimmed_image = trim_whitespace(image)
         images.append(trimmed_image)
+    logging.info(f"Converted PDF {pdf_path} to images.")
     return images
 def convert_docx_to_jpeg(docx_bytes):
+    document = Document(io.BytesIO(docx_bytes))
     images = []
     for rel in document.part.rels.values():
         if "image" in rel.target_ref:
             image_stream = rel.target_part.blob
+            image = Image.open(io.BytesIO(image_stream))
+            jpeg_image = io.BytesIO()
             image.convert('RGB').save(jpeg_image, format="JPEG")
             jpeg_image.seek(0)
             images.append(Image.open(jpeg_image))
+    logging.info("Converted DOCX to images.")
     return images
 def remove_background_from_image(image):
+    result = remove(image)
+    logging.info("Removed background from image.")
+    return result
 def process_file(input_file):
     file_extension = os.path.splitext(input_file.name)[1].lower()
         images = convert_docx_to_jpeg(input_file.name)
         images = [remove_background_from_image(image) for image in images]
     else:
+        logging.error("File format not supported.")
         return "File format not supported."
     input_folder = 'output_images'
         if img.mode == 'RGBA':
             img = img.convert('RGB')
         img.save(os.path.join(input_folder, f'image_{i}.jpg'))
+    logging.info("Processed file and saved images.")
     return images
 def run_detection_and_ocr():
     # Load models
     ocr_model = Model.load('hezarai/crnn-fa-printed-96-long')
     input_folder = 'output_images'
     yolo_model_check.predict(input_folder, save=True, conf=0.5, save_crop=True)
+    logging.info("Ran YOLO detection for check model.")
     output_folder = 'runs/detect/predict'
     crop_folder = os.path.join(output_folder, 'crops')
     for filename in os.listdir(input_folder):
         if filename.endswith('.JPEG') or filename.endswith('.jpg'):
             image_path = os.path.join(input_folder, filename)
             if os.path.exists(crop_folder):
                 crops = []
                 for crop_label in os.listdir(crop_folder):
                     'image': filename,
                     'crops': crops
                 })
+    logging.info("Processed detection and OCR.")
     output_json_path = 'output.json'
     with open(output_json_path, 'w', encoding='utf-8') as f:
         json.dump(results, f, ensure_ascii=False, indent=4)
+    logging.info("Saved results to JSON.")
     return output_json_path
 def predict_text(model, image_path):
         image = image.resize((320, 320))
         output = model.predict(image)
         if isinstance(output, list):
+            result = ' '.join([item['text'] for item in output])
+            logging.info(f"Predicted text for {image_path}.")
+            return result
         return str(output)
     except FileNotFoundError:
+        logging.error(f"File not found: {image_path}.")
         return "N/A"
 def process_numbers(model, image_path):
+    label_map = {
+        '-': '/',
+        '0': '0',
+        '1': '1',
+        '2': '2',
+        '3': '3',
+        '4': '4',
+        '4q': '4',
+        '5': '5',
+        '6': '6',
+        '6q': '6',
+        '7': '7',
+        '8': '8',
+        '9': '9'
+    }
     results = model(image_path, conf=0.5, save_crop=False)
     detected_objects = []
     for result in results[0].boxes:
         class_id = int(result.cls[0].cpu().numpy())
         label = model.names[class_id]
+        mapped_label = label_map.get(label, '')
+        detected_objects.append({'bbox': result.xyxy[0].cpu().numpy().tolist(), 'label': mapped_label})
     sorted_objects = sorted(detected_objects, key=lambda x: x['bbox'][0])
+    logging.info(f"Processed numbers for {image_path}.")
     return ''.join([obj['label'] for obj in sorted_objects])
 def gradio_interface(input_file):
     process_file(input_file)
     json_output = run_detection_and_ocr()
     with open(json_output, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    logging.info("Generated JSON output for Gradio interface.")
+    return data
 iface = gr.Interface(
     fn=gradio_interface,
 )
 if __name__ == "__main__":
+    logging.info("Starting Gradio interface.")
+    iface.launch()