cdcvd commited on
Commit
d912153
1 Parent(s): afa3a48

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -30
app.py CHANGED
@@ -1,20 +1,23 @@
1
  import os
2
-
3
- from PIL import Image, ImageOps, ImageChops
4
  import io
5
  import fitz # PyMuPDF
 
6
  from docx import Document
7
  from rembg import remove
8
  import gradio as gr
9
  from hezar.models import Model
10
  from ultralytics import YOLO
11
  import json
 
 
 
 
 
12
 
13
  # ایجاد دایرکتوری‌های لازم
14
  os.makedirs("static", exist_ok=True)
15
  os.makedirs("output_images", exist_ok=True)
16
 
17
-
18
  def remove_readonly(func, path, excinfo):
19
  os.chmod(path, stat.S_IWRITE)
20
  func(path)
@@ -22,11 +25,9 @@ def remove_readonly(func, path, excinfo):
22
  current_dir = os.path.dirname(os.path.abspath(__file__))
23
  ultralytics_path = os.path.join(current_dir, 'runs')
24
 
25
-
26
-
27
  if os.path.exists(ultralytics_path):
28
-
29
- shutil.rmtree(ultralytics_path, onerror=remove_readonly)
30
  def trim_whitespace(image):
31
  gray_image = ImageOps.grayscale(image)
32
  inverted_image = ImageChops.invert(gray_image)
@@ -44,23 +45,27 @@ def convert_pdf_to_images(pdf_path, zoom=2):
44
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
45
  trimmed_image = trim_whitespace(image)
46
  images.append(trimmed_image)
 
47
  return images
48
 
49
  def convert_docx_to_jpeg(docx_bytes):
50
- document = Document(BytesIO(docx_bytes))
51
  images = []
52
  for rel in document.part.rels.values():
53
  if "image" in rel.target_ref:
54
  image_stream = rel.target_part.blob
55
- image = Image.open(BytesIO(image_stream))
56
- jpeg_image = BytesIO()
57
  image.convert('RGB').save(jpeg_image, format="JPEG")
58
  jpeg_image.seek(0)
59
  images.append(Image.open(jpeg_image))
 
60
  return images
61
 
62
  def remove_background_from_image(image):
63
- return remove(image)
 
 
64
 
65
  def process_file(input_file):
66
  file_extension = os.path.splitext(input_file.name)[1].lower()
@@ -77,6 +82,7 @@ def process_file(input_file):
77
  images = convert_docx_to_jpeg(input_file.name)
78
  images = [remove_background_from_image(image) for image in images]
79
  else:
 
80
  return "File format not supported."
81
 
82
  input_folder = 'output_images'
@@ -84,14 +90,9 @@ def process_file(input_file):
84
  if img.mode == 'RGBA':
85
  img = img.convert('RGB')
86
  img.save(os.path.join(input_folder, f'image_{i}.jpg'))
87
-
88
  return images
89
 
90
-
91
- import shutil
92
-
93
-
94
-
95
  def run_detection_and_ocr():
96
  # Load models
97
  ocr_model = Model.load('hezarai/crnn-fa-printed-96-long')
@@ -100,7 +101,8 @@ def run_detection_and_ocr():
100
 
101
  input_folder = 'output_images'
102
  yolo_model_check.predict(input_folder, save=True, conf=0.5, save_crop=True)
103
-
 
104
  output_folder = 'runs/detect/predict'
105
  crop_folder = os.path.join(output_folder, 'crops')
106
 
@@ -109,7 +111,6 @@ def run_detection_and_ocr():
109
  for filename in os.listdir(input_folder):
110
  if filename.endswith('.JPEG') or filename.endswith('.jpg'):
111
  image_path = os.path.join(input_folder, filename)
112
-
113
  if os.path.exists(crop_folder):
114
  crops = []
115
  for crop_label in os.listdir(crop_folder):
@@ -130,11 +131,11 @@ def run_detection_and_ocr():
130
  'image': filename,
131
  'crops': crops
132
  })
133
-
134
  output_json_path = 'output.json'
135
  with open(output_json_path, 'w', encoding='utf-8') as f:
136
  json.dump(results, f, ensure_ascii=False, indent=4)
137
-
138
  return output_json_path
139
 
140
  def predict_text(model, image_path):
@@ -143,26 +144,48 @@ def predict_text(model, image_path):
143
  image = image.resize((320, 320))
144
  output = model.predict(image)
145
  if isinstance(output, list):
146
- return ' '.join([item['text'] for item in output])
 
 
147
  return str(output)
148
  except FileNotFoundError:
 
149
  return "N/A"
150
 
151
  def process_numbers(model, image_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  results = model(image_path, conf=0.5, save_crop=False)
153
  detected_objects = []
154
  for result in results[0].boxes:
155
  class_id = int(result.cls[0].cpu().numpy())
156
  label = model.names[class_id]
157
- detected_objects.append({'bbox': result.xyxy[0].cpu().numpy().tolist(), 'label': label})
 
158
  sorted_objects = sorted(detected_objects, key=lambda x: x['bbox'][0])
 
159
  return ''.join([obj['label'] for obj in sorted_objects])
160
 
161
  def gradio_interface(input_file):
162
  process_file(input_file)
163
  json_output = run_detection_and_ocr()
164
  with open(json_output, 'r', encoding='utf-8') as f:
165
- return json.load(f)
 
 
166
 
167
  iface = gr.Interface(
168
  fn=gradio_interface,
@@ -172,9 +195,5 @@ iface = gr.Interface(
172
  )
173
 
174
  if __name__ == "__main__":
175
- iface.launch()
176
-
177
-
178
-
179
-
180
-
 
1
  import os
 
 
2
  import io
3
  import fitz # PyMuPDF
4
+ from PIL import Image, ImageOps, ImageChops
5
  from docx import Document
6
  from rembg import remove
7
  import gradio as gr
8
  from hezar.models import Model
9
  from ultralytics import YOLO
10
  import json
11
+ import logging
12
+ import shutil
13
+
14
+ # تنظیمات لاگ
15
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
16
 
17
  # ایجاد دایرکتوری‌های لازم
18
  os.makedirs("static", exist_ok=True)
19
  os.makedirs("output_images", exist_ok=True)
20
 
 
21
  def remove_readonly(func, path, excinfo):
22
  os.chmod(path, stat.S_IWRITE)
23
  func(path)
 
25
  current_dir = os.path.dirname(os.path.abspath(__file__))
26
  ultralytics_path = os.path.join(current_dir, 'runs')
27
 
 
 
28
  if os.path.exists(ultralytics_path):
29
+ shutil.rmtree(ultralytics_path, onerror=remove_readonly)
30
+
31
  def trim_whitespace(image):
32
  gray_image = ImageOps.grayscale(image)
33
  inverted_image = ImageChops.invert(gray_image)
 
45
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
46
  trimmed_image = trim_whitespace(image)
47
  images.append(trimmed_image)
48
+ logging.info(f"Converted PDF {pdf_path} to images.")
49
  return images
50
 
51
  def convert_docx_to_jpeg(docx_bytes):
52
+ document = Document(io.BytesIO(docx_bytes))
53
  images = []
54
  for rel in document.part.rels.values():
55
  if "image" in rel.target_ref:
56
  image_stream = rel.target_part.blob
57
+ image = Image.open(io.BytesIO(image_stream))
58
+ jpeg_image = io.BytesIO()
59
  image.convert('RGB').save(jpeg_image, format="JPEG")
60
  jpeg_image.seek(0)
61
  images.append(Image.open(jpeg_image))
62
+ logging.info("Converted DOCX to images.")
63
  return images
64
 
65
  def remove_background_from_image(image):
66
+ result = remove(image)
67
+ logging.info("Removed background from image.")
68
+ return result
69
 
70
  def process_file(input_file):
71
  file_extension = os.path.splitext(input_file.name)[1].lower()
 
82
  images = convert_docx_to_jpeg(input_file.name)
83
  images = [remove_background_from_image(image) for image in images]
84
  else:
85
+ logging.error("File format not supported.")
86
  return "File format not supported."
87
 
88
  input_folder = 'output_images'
 
90
  if img.mode == 'RGBA':
91
  img = img.convert('RGB')
92
  img.save(os.path.join(input_folder, f'image_{i}.jpg'))
93
+ logging.info("Processed file and saved images.")
94
  return images
95
 
 
 
 
 
 
96
  def run_detection_and_ocr():
97
  # Load models
98
  ocr_model = Model.load('hezarai/crnn-fa-printed-96-long')
 
101
 
102
  input_folder = 'output_images'
103
  yolo_model_check.predict(input_folder, save=True, conf=0.5, save_crop=True)
104
+ logging.info("Ran YOLO detection for check model.")
105
+
106
  output_folder = 'runs/detect/predict'
107
  crop_folder = os.path.join(output_folder, 'crops')
108
 
 
111
  for filename in os.listdir(input_folder):
112
  if filename.endswith('.JPEG') or filename.endswith('.jpg'):
113
  image_path = os.path.join(input_folder, filename)
 
114
  if os.path.exists(crop_folder):
115
  crops = []
116
  for crop_label in os.listdir(crop_folder):
 
131
  'image': filename,
132
  'crops': crops
133
  })
134
+ logging.info("Processed detection and OCR.")
135
  output_json_path = 'output.json'
136
  with open(output_json_path, 'w', encoding='utf-8') as f:
137
  json.dump(results, f, ensure_ascii=False, indent=4)
138
+ logging.info("Saved results to JSON.")
139
  return output_json_path
140
 
141
  def predict_text(model, image_path):
 
144
  image = image.resize((320, 320))
145
  output = model.predict(image)
146
  if isinstance(output, list):
147
+ result = ' '.join([item['text'] for item in output])
148
+ logging.info(f"Predicted text for {image_path}.")
149
+ return result
150
  return str(output)
151
  except FileNotFoundError:
152
+ logging.error(f"File not found: {image_path}.")
153
  return "N/A"
154
 
155
  def process_numbers(model, image_path):
156
+ label_map = {
157
+ '-': '/',
158
+ '0': '0',
159
+ '1': '1',
160
+ '2': '2',
161
+ '3': '3',
162
+ '4': '4',
163
+ '4q': '4',
164
+ '5': '5',
165
+ '6': '6',
166
+ '6q': '6',
167
+ '7': '7',
168
+ '8': '8',
169
+ '9': '9'
170
+ }
171
  results = model(image_path, conf=0.5, save_crop=False)
172
  detected_objects = []
173
  for result in results[0].boxes:
174
  class_id = int(result.cls[0].cpu().numpy())
175
  label = model.names[class_id]
176
+ mapped_label = label_map.get(label, '')
177
+ detected_objects.append({'bbox': result.xyxy[0].cpu().numpy().tolist(), 'label': mapped_label})
178
  sorted_objects = sorted(detected_objects, key=lambda x: x['bbox'][0])
179
+ logging.info(f"Processed numbers for {image_path}.")
180
  return ''.join([obj['label'] for obj in sorted_objects])
181
 
182
  def gradio_interface(input_file):
183
  process_file(input_file)
184
  json_output = run_detection_and_ocr()
185
  with open(json_output, 'r', encoding='utf-8') as f:
186
+ data = json.load(f)
187
+ logging.info("Generated JSON output for Gradio interface.")
188
+ return data
189
 
190
  iface = gr.Interface(
191
  fn=gradio_interface,
 
195
  )
196
 
197
  if __name__ == "__main__":
198
+ logging.info("Starting Gradio interface.")
199
+ iface.launch()