Scezui commited on
Commit
fd03164
1 Parent(s): 5bc4c7c

fixed bugs in create_csv

Browse files
Files changed (36) hide show
  1. Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc +0 -0
  2. Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc +0 -0
  3. Layoutlmv3_inference/annotate_image.py +1 -1
  4. app.py +46 -49
  5. experiment.ipynb +0 -1337
  6. inferenced/csv_files/Output_0.csv +0 -4
  7. inferenced/csv_files/Output_1.csv +0 -2
  8. inferenced/csv_files/Output_2.csv +0 -3
  9. inferenced/csv_files/Output_3.csv +0 -2
  10. inferenced/csv_files/Output_4.csv +0 -2
  11. inferenced/output.csv +0 -9
  12. inferenced/sample1_711_inference.jpg +0 -0
  13. inferenced/sample1_grace_inference.jpg +0 -0
  14. inferenced/sample_711_inference.jpg +0 -0
  15. inferenced/sample_coop_inference.jpg +0 -0
  16. inferenced/sample_grace_inference.jpg +0 -0
  17. log/error_output.log +18 -0
  18. static/inference/Layoutlmv3_inference/__init__.py +0 -0
  19. static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-310.pyc +0 -0
  20. static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-311.pyc +0 -0
  21. static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-312.pyc +0 -0
  22. static/inference/Layoutlmv3_inference/__pycache__/annotate_image.cpython-310.pyc +0 -0
  23. static/inference/Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc +0 -0
  24. static/inference/Layoutlmv3_inference/__pycache__/inference_handler.cpython-310.pyc +0 -0
  25. static/inference/Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc +0 -0
  26. static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-310.pyc +0 -0
  27. static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-311.pyc +0 -0
  28. static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-312.pyc +0 -0
  29. static/inference/Layoutlmv3_inference/__pycache__/utils.cpython-310.pyc +0 -0
  30. static/inference/Layoutlmv3_inference/__pycache__/utils.cpython-311.pyc +0 -0
  31. static/inference/Layoutlmv3_inference/annotate_image.py +0 -56
  32. static/inference/Layoutlmv3_inference/inference_handler.py +0 -199
  33. static/inference/Layoutlmv3_inference/ocr.py +0 -187
  34. static/inference/Layoutlmv3_inference/utils.py +0 -50
  35. static/inference/preprocess.py +0 -206
  36. static/inference/run_inference.py +0 -27
Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc CHANGED
Binary files a/Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc and b/Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc differ
 
Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc CHANGED
Binary files a/Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc and b/Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc differ
 
Layoutlmv3_inference/annotate_image.py CHANGED
@@ -50,7 +50,7 @@ def annotate_image(image_path, annotation_object):
50
 
51
  image_name = os.path.basename(image_path)
52
  image_name = image_name[:image_name.find('.')]
53
- output_folder = 'inferenced/'
54
  os.makedirs(output_folder, exist_ok=True)
55
 
56
  img.save(os.path.join(output_folder, f'{image_name}_inference.jpg'))
 
50
 
51
  image_name = os.path.basename(image_path)
52
  image_name = image_name[:image_name.find('.')]
53
+ output_folder = 'static/temp/inferenced/'
54
  os.makedirs(output_folder, exist_ok=True)
55
 
56
  img.save(os.path.join(output_folder, f'{image_name}_inference.jpg'))
app.py CHANGED
@@ -60,7 +60,7 @@ def index():
60
 
61
  # Source folders
62
  temp_folder = r'static/temp'
63
- inferenced_folder = r'inferenced'
64
 
65
  # Destination folder path
66
  destination_folder = os.path.join('output_folders', dt_string) # Create a new folder with timestamp
@@ -104,8 +104,8 @@ def make_predictions(image_paths):
104
  temp = None
105
  try:
106
  # For Windows OS
107
- # temp = pathlib.PosixPath # Save the original state
108
- # pathlib.PosixPath = pathlib.WindowsPath # Change to WindowsPath temporarily
109
 
110
  model_path = Path(r'model/export')
111
  learner = load_learner(model_path)
@@ -129,8 +129,8 @@ def make_predictions(image_paths):
129
  except Exception as e:
130
  return {"error in make_predictions": str(e)}
131
 
132
- # finally:
133
- # pathlib.PosixPath = temp
134
 
135
  import copy
136
  @app.route('/predict/<filenames>', methods=['GET', 'POST'])
@@ -181,7 +181,7 @@ def predict_files(filenames):
181
  @app.route('/get_inference_image')
182
  def get_inference_image():
183
  # Assuming the new image is stored in the 'inferenced' folder with the name 'temp_inference.jpg'
184
- inferenced_image = 'inferenced/temp_inference.jpg'
185
  return jsonify(updatedImagePath=inferenced_image), 200 # Return the image path with a 200 status code
186
 
187
 
@@ -231,7 +231,6 @@ def replace_symbols_with_period(value):
231
  return value.replace(',', '.')
232
 
233
 
234
- from itertools import zip_longest
235
 
236
  @app.route('/create_csv', methods=['GET'])
237
  def create_csv():
@@ -240,11 +239,14 @@ def create_csv():
240
  json_folder_path = r"static/temp/labeled" # Change this to your folder path
241
 
242
  # Path to the output CSV folder
243
- output_folder_path = r"inferenced/csv_files"
244
  os.makedirs(output_folder_path, exist_ok=True)
245
 
246
- # Initialize an empty list to store all JSON data
247
- all_data = []
 
 
 
248
 
249
  # Iterate through JSON files in the folder
250
  for filename in os.listdir(json_folder_path):
@@ -253,57 +255,39 @@ def create_csv():
253
 
254
  with open(json_file_path, 'r') as file:
255
  data = json.load(file)
256
- all_data.extend(data['output'])
257
 
258
- # Creating a dictionary to store labels and corresponding texts for this JSON file
259
  label_texts = {}
260
- for item in data['output']:
261
  label = item['label']
262
  text = item['text']
263
-
264
- # Ensure label exists before adding to dictionary
265
- if label not in label_texts:
266
- label_texts[label] = []
267
- label_texts[label].append(text)
268
-
269
- # Order of columns as requested
270
- column_order = [
271
- 'RECEIPTNUMBER', 'MERCHANTNAME', 'MERCHANTADDRESS',
272
- 'TRANSACTIONDATE', 'TRANSACTIONTIME', 'ITEMS',
273
- 'PRICE', 'TOTAL', 'VATTAX'
274
- ]
275
 
276
  # Writing data to CSV file with ordered columns
277
  csv_file_path = os.path.join(output_folder_path, os.path.splitext(filename)[0] + '.csv')
278
  with open(csv_file_path, 'w', newline='') as csvfile:
279
  csv_writer = csv.DictWriter(csvfile, fieldnames=column_order, delimiter=",")
280
- csv_writer.writeheader()
 
281
 
282
- # Iterate through items and prices
283
- max_length = max(len(label_texts.get('ITEMS', [])), len(label_texts.get('PRICE', [])))
 
 
284
  for i in range(max_length):
285
- # Use get() with default '' to avoid KeyError
286
- items = label_texts.get('ITEMS', [])[i] if i < len(label_texts.get('ITEMS', [])) else ''
287
- prices = label_texts.get('PRICE', [])[i] if i < len(label_texts.get('PRICE', [])) else ''
288
-
289
  # Check if items and prices are separated by space
290
- if ' ' in items or ' ' in prices:
291
- item_list = items.split() if items else []
292
- price_list = prices.split() if prices else []
293
-
294
- # Create new rows for each combination of items and prices
295
- for item, price in zip(item_list, price_list):
296
- row_data = {label: replace_symbols_with_period(label_texts[label][i]) if label == 'ITEMS' else replace_symbols_with_period(label_texts[label][i]) for label in column_order}
297
- row_data['ITEMS'] = item
298
- row_data['PRICE'] = price
299
- csv_writer.writerow(row_data)
300
- else:
301
- # Use get() with default '' to avoid KeyError
302
- row_data = {label: replace_symbols_with_period(label_texts.get(label, [])[i]) if i < len(label_texts.get(label, [])) else '' for label in column_order}
303
- csv_writer.writerow(row_data)
304
 
305
  # Combining contents of CSV files into a single CSV file
306
- output_file_path = r"inferenced/output.csv"
307
  with open(output_file_path, 'w', newline='') as combined_csvfile:
308
  combined_csv_writer = csv.DictWriter(combined_csvfile, fieldnames=column_order, delimiter=",")
309
  combined_csv_writer.writeheader()
@@ -324,17 +308,30 @@ def create_csv():
324
  except Exception as e:
325
  print(f"An error occurred in create_csv: {str(e)}")
326
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
327
 
328
  @app.route('/get_data')
329
  def get_data():
330
- return send_from_directory('inferenced','output.csv', as_attachment=False)
331
 
332
  from flask import jsonify
333
 
334
  @app.route('/download_csv', methods=['GET'])
335
  def download_csv():
336
  try:
337
- output_file_path = r"inferenced/output.csv" # path to output CSV file
338
  # Check if the file exists
339
  if os.path.exists(output_file_path):
340
  return send_file(output_file_path, as_attachment=True, download_name='output.csv')
 
60
 
61
  # Source folders
62
  temp_folder = r'static/temp'
63
+ inferenced_folder = r'static/temp/inferenced'
64
 
65
  # Destination folder path
66
  destination_folder = os.path.join('output_folders', dt_string) # Create a new folder with timestamp
 
104
  temp = None
105
  try:
106
  # For Windows OS
107
+ temp = pathlib.PosixPath # Save the original state
108
+ pathlib.PosixPath = pathlib.WindowsPath # Change to WindowsPath temporarily
109
 
110
  model_path = Path(r'model/export')
111
  learner = load_learner(model_path)
 
129
  except Exception as e:
130
  return {"error in make_predictions": str(e)}
131
 
132
+ finally:
133
+ pathlib.PosixPath = temp
134
 
135
  import copy
136
  @app.route('/predict/<filenames>', methods=['GET', 'POST'])
 
181
  @app.route('/get_inference_image')
182
  def get_inference_image():
183
  # Assuming the new image is stored in the 'inferenced' folder with the name 'temp_inference.jpg'
184
+ inferenced_image = 'static/temp/inferenced/temp_inference.jpg'
185
  return jsonify(updatedImagePath=inferenced_image), 200 # Return the image path with a 200 status code
186
 
187
 
 
231
  return value.replace(',', '.')
232
 
233
 
 
234
 
235
  @app.route('/create_csv', methods=['GET'])
236
  def create_csv():
 
239
  json_folder_path = r"static/temp/labeled" # Change this to your folder path
240
 
241
  # Path to the output CSV folder
242
+ output_folder_path = r"static/temp/inferenced/csv_files"
243
  os.makedirs(output_folder_path, exist_ok=True)
244
 
245
+ column_order = [
246
+ 'RECEIPTNUMBER', 'MERCHANTNAME', 'MERCHANTADDRESS',
247
+ 'TRANSACTIONDATE', 'TRANSACTIONTIME', 'ITEMS',
248
+ 'PRICE', 'TOTAL', 'VATTAX'
249
+ ]
250
 
251
  # Iterate through JSON files in the folder
252
  for filename in os.listdir(json_folder_path):
 
255
 
256
  with open(json_file_path, 'r') as file:
257
  data = json.load(file)
258
+ all_data = data.get('output', [])
259
 
260
+ # Initialize a dictionary to store labels and corresponding texts for this JSON file
261
  label_texts = {}
262
+ for item in all_data:
263
  label = item['label']
264
  text = item['text']
265
+ label_texts[label] = text
 
 
 
 
 
 
 
 
 
 
 
266
 
267
  # Writing data to CSV file with ordered columns
268
  csv_file_path = os.path.join(output_folder_path, os.path.splitext(filename)[0] + '.csv')
269
  with open(csv_file_path, 'w', newline='') as csvfile:
270
  csv_writer = csv.DictWriter(csvfile, fieldnames=column_order, delimiter=",")
271
+ if os.path.getsize(csv_file_path) == 0:
272
+ csv_writer.writeheader()
273
 
274
+ # Constructing rows for the CSV file
275
+ items = label_texts.get('ITEMS', '').split()
276
+ prices = label_texts.get('PRICE', '').split()
277
+ max_length = max(len(items), len(prices))
278
  for i in range(max_length):
279
+ row_data = {}
280
+ for label in column_order:
281
+ # Use get() with default '' to handle missing labels gracefully
282
+ row_data[label] = label_texts.get(label, '')
283
  # Check if items and prices are separated by space
284
+ if i < len(items) and i < len(prices):
285
+ row_data['ITEMS'] = items[i]
286
+ row_data['PRICE'] = prices[i]
287
+ csv_writer.writerow(row_data)
 
 
 
 
 
 
 
 
 
 
288
 
289
  # Combining contents of CSV files into a single CSV file
290
+ output_file_path = r"static/temp/inferenced/output.csv"
291
  with open(output_file_path, 'w', newline='') as combined_csvfile:
292
  combined_csv_writer = csv.DictWriter(combined_csvfile, fieldnames=column_order, delimiter=",")
293
  combined_csv_writer.writeheader()
 
308
  except Exception as e:
309
  print(f"An error occurred in create_csv: {str(e)}")
310
  return None
311
+
312
+ except FileNotFoundError as e:
313
+ print(f"File not found error: {str(e)}")
314
+ return jsonify({'error': 'File not found.'}), 404
315
+ except json.JSONDecodeError as e:
316
+ print(f"JSON decoding error: {str(e)}")
317
+ return jsonify({'error': 'JSON decoding error.'}), 500
318
+ except csv.Error as e:
319
+ print(f"CSV error: {str(e)}")
320
+ return jsonify({'error': 'CSV error.'}), 500
321
+ except Exception as e:
322
+ print(f"An unexpected error occurred: {str(e)}")
323
+ return jsonify({'error': 'An unexpected error occurred.'}), 500
324
 
325
  @app.route('/get_data')
326
  def get_data():
327
+ return send_from_directory('static/temp/inferenced','output.csv', as_attachment=False)
328
 
329
  from flask import jsonify
330
 
331
  @app.route('/download_csv', methods=['GET'])
332
  def download_csv():
333
  try:
334
+ output_file_path = r"static/temp/inferenced/output.csv" # path to output CSV file
335
  # Check if the file exists
336
  if os.path.exists(output_file_path):
337
  return send_file(output_file_path, as_attachment=True, download_name='output.csv')
experiment.ipynb DELETED
@@ -1,1337 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 17,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "# defining inference parameters\n",
10
- "model_path = r\"C:\\Users\\Ayoo\\Desktop\\webapp\\model\" # path to Layoutlmv3 model\n",
11
- "imag_path = r\"C:\\Users\\Ayoo\\Desktop\\webapp\\predictions\\imgs\" # images folder"
12
- ]
13
- },
14
- {
15
- "cell_type": "code",
16
- "execution_count": 33,
17
- "metadata": {},
18
- "outputs": [
19
- {
20
- "name": "stdout",
21
- "output_type": "stream",
22
- "text": [
23
- "^C\n"
24
- ]
25
- },
26
- {
27
- "name": "stderr",
28
- "output_type": "stream",
29
- "text": [
30
- "2023-12-16 02:35:50.587274: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
31
- "WARNING:tensorflow:From C:\\Users\\Ayoo\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
32
- "\n",
33
- "c:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\site-packages\\transformers\\modeling_utils.py:881: FutureWarning: The `device` argument is deprecated and will be removed in v5 of Transformers.\n",
34
- " warnings.warn(\n"
35
- ]
36
- },
37
- {
38
- "name": "stdout",
39
- "output_type": "stream",
40
- "text": [
41
- "Preparing for Inference\n",
42
- "Starting\n",
43
- "Preprocessing\n",
44
- "Preprocessing done. Running OCR\n",
45
- "JSON file saved\n",
46
- "OCR done\n",
47
- "Run Done\n",
48
- "Cleaned Tesseract output done\n",
49
- "Word list done\n",
50
- "Box list done\n",
51
- "Prepared for Inference Batch\n",
52
- "Running Flattened Output\n",
53
- "Ready for Annotation\n",
54
- "Annotating Images\n"
55
- ]
56
- }
57
- ],
58
- "source": [
59
- "! python predictions\\inference\\run_inference.py --model_path {model_path} --images_path {imag_path}"
60
- ]
61
- },
62
- {
63
- "cell_type": "code",
64
- "execution_count": 20,
65
- "metadata": {},
66
- "outputs": [
67
- {
68
- "name": "stdout",
69
- "output_type": "stream",
70
- "text": [
71
- "Looking for C:\\Users\\Ayoo\\.keras-ocr\\craft_mlt_25k.h5\n"
72
- ]
73
- },
74
- {
75
- "ename": "KeyboardInterrupt",
76
- "evalue": "",
77
- "output_type": "error",
78
- "traceback": [
79
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
80
- "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
81
- "Cell \u001b[1;32mIn[20], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mkeras_ocr\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m pipeline\u001b[38;5;241m=\u001b[39m\u001b[43mkeras_ocr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipeline\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mPipeline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
82
- "File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\site-packages\\keras_ocr\\pipeline.py:20\u001b[0m, in \u001b[0;36mPipeline.__init__\u001b[1;34m(self, detector, recognizer, scale, max_size)\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, detector\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, recognizer\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, scale\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m, max_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2048\u001b[39m):\n\u001b[0;32m 19\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m detector \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m---> 20\u001b[0m detector \u001b[38;5;241m=\u001b[39m \u001b[43mdetection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDetector\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 21\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recognizer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 22\u001b[0m recognizer \u001b[38;5;241m=\u001b[39m recognition\u001b[38;5;241m.\u001b[39mRecognizer()\n",
83
- "File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\site-packages\\keras_ocr\\detection.py:686\u001b[0m, in \u001b[0;36mDetector.__init__\u001b[1;34m(self, weights, load_from_torch, optimizer, backbone_name)\u001b[0m\n\u001b[0;32m 682\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m (\n\u001b[0;32m 683\u001b[0m pretrained_key \u001b[38;5;129;01min\u001b[39;00m PRETRAINED_WEIGHTS\n\u001b[0;32m 684\u001b[0m ), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSelected weights configuration not found.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 685\u001b[0m weights_config \u001b[38;5;241m=\u001b[39m PRETRAINED_WEIGHTS[pretrained_key]\n\u001b[1;32m--> 686\u001b[0m weights_path \u001b[38;5;241m=\u001b[39m \u001b[43mtools\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_and_verify\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 687\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights_config\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43murl\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 688\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights_config\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfilename\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 689\u001b[0m \u001b[43m \u001b[49m\u001b[43msha256\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights_config\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msha256\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 690\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 691\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 692\u001b[0m weights_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
84
- "File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\site-packages\\keras_ocr\\tools.py:527\u001b[0m, in \u001b[0;36mdownload_and_verify\u001b[1;34m(url, sha256, cache_dir, verbose, filename)\u001b[0m\n\u001b[0;32m 525\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloading \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m filepath)\n\u001b[0;32m 526\u001b[0m urllib\u001b[38;5;241m.\u001b[39mrequest\u001b[38;5;241m.\u001b[39murlretrieve(url, filepath)\n\u001b[1;32m--> 527\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m sha256 \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m sha256 \u001b[38;5;241m==\u001b[39m \u001b[43msha256sum\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 528\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilepath\u001b[49m\n\u001b[0;32m 529\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError occurred verifying sha256.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 530\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m filepath\n",
85
- "File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\site-packages\\keras_ocr\\tools.py:491\u001b[0m, in \u001b[0;36msha256sum\u001b[1;34m(filename)\u001b[0m\n\u001b[0;32m 489\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(filename, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrb\u001b[39m\u001b[38;5;124m\"\u001b[39m, buffering\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[0;32m 490\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m n \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28miter\u001b[39m(\u001b[38;5;28;01mlambda\u001b[39;00m: f\u001b[38;5;241m.\u001b[39mreadinto(mv), \u001b[38;5;241m0\u001b[39m): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m--> 491\u001b[0m h\u001b[38;5;241m.\u001b[39mupdate(mv[:n])\n\u001b[0;32m 492\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m h\u001b[38;5;241m.\u001b[39mhexdigest()\n",
86
- "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
87
- ]
88
- }
89
- ],
90
- "source": [
91
- "import keras_ocr\n",
92
- "pipeline=keras_ocr.pipeline.Pipeline()"
93
- ]
94
- },
95
- {
96
- "cell_type": "code",
97
- "execution_count": 4,
98
- "metadata": {},
99
- "outputs": [
100
- {
101
- "name": "stdout",
102
- "output_type": "stream",
103
- "text": [
104
- "1/1 [==============================] - 34s 34s/step\n",
105
- "7/7 [==============================] - 94s 13s/step\n"
106
- ]
107
- },
108
- {
109
- "data": {
110
- "text/plain": [
111
- "[[('feleven',\n",
112
- " array([[212.58102 , 34.90136 ],\n",
113
- " [577.45886 , 34.901367],\n",
114
- " [577.45886 , 114.22263 ],\n",
115
- " [212.58102 , 114.22263 ]], dtype=float32)),\n",
116
- " ('es',\n",
117
- " array([[574.28613, 82.49414],\n",
118
- " [593.32324, 82.49414],\n",
119
- " [593.32324, 107.87695],\n",
120
- " [574.28613, 107.87695]], dtype=float32)),\n",
121
- " ('store',\n",
122
- " array([[453.71777, 203.0625 ],\n",
123
- " [567.9404 , 203.0625 ],\n",
124
- " [567.9404 , 253.82812],\n",
125
- " [453.71777, 253.82812]], dtype=float32)),\n",
126
- " ('nahj',\n",
127
- " array([[120.56836, 209.4082 ],\n",
128
- " [187.19824, 209.4082 ],\n",
129
- " [187.19824, 253.82812],\n",
130
- " [120.56836, 253.82812]], dtype=float32)),\n",
131
- " ('conveni',\n",
132
- " array([[203.0625 , 209.4082 ],\n",
133
- " [352.18652, 209.4082 ],\n",
134
- " [352.18652, 253.82812],\n",
135
- " [203.0625 , 253.82812]], dtype=float32)),\n",
136
- " ('enco',\n",
137
- " array([[352.18652, 209.4082 ],\n",
138
- " [441.02637, 209.4082 ],\n",
139
- " [441.02637, 253.82812],\n",
140
- " [352.18652, 253.82812]], dtype=float32)),\n",
141
- " ('qwned',\n",
142
- " array([[ 34.901367, 260.17383 ],\n",
143
- " [149.12402 , 260.17383 ],\n",
144
- " [149.12402 , 304.59375 ],\n",
145
- " [ 34.901367, 304.59375 ]], dtype=float32)),\n",
146
- " ('operated',\n",
147
- " array([[203.0625 , 260.17383],\n",
148
- " [377.56934, 260.17383],\n",
149
- " [377.56934, 307.7666 ],\n",
150
- " [203.0625 , 307.7666 ]], dtype=float32)),\n",
151
- " ('nancy',\n",
152
- " array([[475.92773, 260.17383],\n",
153
- " [586.97754, 260.17383],\n",
154
- " [586.97754, 304.59375],\n",
155
- " [475.92773, 304.59375]], dtype=float32)),\n",
156
- " ('byl',\n",
157
- " array([[393.4336 , 263.34668],\n",
158
- " [456.89062, 263.34668],\n",
159
- " [456.89062, 307.7666 ],\n",
160
- " [393.4336 , 307.7666 ]], dtype=float32)),\n",
161
- " ('a',\n",
162
- " array([[602.8418 , 263.34668],\n",
163
- " [634.5703 , 263.34668],\n",
164
- " [634.5703 , 301.4209 ],\n",
165
- " [602.8418 , 301.4209 ]], dtype=float32)),\n",
166
- " ('cl',\n",
167
- " array([[244.30957, 314.1123 ],\n",
168
- " [288.7295 , 314.1123 ],\n",
169
- " [288.7295 , 355.35938],\n",
170
- " [244.30957, 355.35938]], dtype=float32)),\n",
171
- " ('inacosa',\n",
172
- " array([[291.90234, 314.1123 ],\n",
173
- " [437.85352, 314.1123 ],\n",
174
- " [437.85352, 355.35938],\n",
175
- " [291.90234, 355.35938]], dtype=float32)),\n",
176
- " ('tregtin',\n",
177
- " array([[123.74121, 358.53223],\n",
178
- " [276.0381 , 358.53223],\n",
179
- " [276.0381 , 406.125 ],\n",
180
- " [123.74121, 406.125 ]], dtype=float32)),\n",
181
- " ('va',\n",
182
- " array([[ 76.14844, 361.70508],\n",
183
- " [123.74121, 361.70508],\n",
184
- " [123.74121, 406.125 ],\n",
185
- " [ 76.14844, 406.125 ]], dtype=float32)),\n",
186
- " ('hssysigm',\n",
187
- " array([[285.55664, 361.70508],\n",
188
- " [485.4463 , 361.70508],\n",
189
- " [485.4463 , 406.125 ],\n",
190
- " [285.55664, 406.125 ]], dtype=float32)),\n",
191
- " ('gbsr0o2',\n",
192
- " array([[475.92773, 361.70508],\n",
193
- " [631.39746, 361.70508],\n",
194
- " [631.39746, 406.125 ],\n",
195
- " [475.92773, 406.125 ]], dtype=float32)),\n",
196
- " ('pobli',\n",
197
- " array([[ 98.3584 , 412.4707 ],\n",
198
- " [187.19824, 412.4707 ],\n",
199
- " [187.19824, 460.06348],\n",
200
- " [ 98.3584 , 460.06348]], dtype=float32)),\n",
201
- " ('acii',\n",
202
- " array([[180.85254, 415.64355],\n",
203
- " [250.65527, 415.64355],\n",
204
- " [250.65527, 460.06348],\n",
205
- " [180.85254, 460.06348]], dtype=float32)),\n",
206
- " ('leons',\n",
207
- " array([[326.8037 , 415.64355],\n",
208
- " [434.68066, 415.64355],\n",
209
- " [434.68066, 460.06348],\n",
210
- " [326.8037 , 460.06348]], dtype=float32)),\n",
211
- " ('ilulos',\n",
212
- " array([[456.89062, 415.64355],\n",
213
- " [602.8418 , 415.64355],\n",
214
- " [602.8418 , 460.06348],\n",
215
- " [456.89062, 460.06348]], dtype=float32)),\n",
216
- " ('ors',\n",
217
- " array([[241.13672, 418.8164 ],\n",
218
- " [304.59375, 418.8164 ],\n",
219
- " [304.59375, 456.89062],\n",
220
- " [241.13672, 456.89062]], dtype=float32)),\n",
221
- " ('fit',\n",
222
- " array([[225.27246, 466.40918],\n",
223
- " [291.90234, 466.40918],\n",
224
- " [291.90234, 510.8291 ],\n",
225
- " [225.27246, 510.8291 ]], dtype=float32)),\n",
226
- " ('ipp',\n",
227
- " array([[314.1123 , 469.58203],\n",
228
- " [380.7422 , 469.58203],\n",
229
- " [380.7422 , 514.00195],\n",
230
- " [314.1123 , 514.00195]], dtype=float32)),\n",
231
- " ('ines',\n",
232
- " array([[374.39648, 469.58203],\n",
233
- " [463.23633, 469.58203],\n",
234
- " [463.23633, 510.8291 ],\n",
235
- " [374.39648, 510.8291 ]], dtype=float32)),\n",
236
- " ('tel',\n",
237
- " array([[225.27246, 517.1748 ],\n",
238
- " [288.7295 , 517.1748 ],\n",
239
- " [288.7295 , 561.5947 ],\n",
240
- " [225.27246, 561.5947 ]], dtype=float32)),\n",
241
- " ('null',\n",
242
- " array([[371.22363, 517.1748 ],\n",
243
- " [466.40918, 517.1748 ],\n",
244
- " [466.40918, 561.5947 ],\n",
245
- " [371.22363, 561.5947 ]], dtype=float32)),\n",
246
- " ('h',\n",
247
- " array([[307.7666 , 520.34766],\n",
248
- " [339.49512, 520.34766],\n",
249
- " [339.49512, 558.4219 ],\n",
250
- " [307.7666 , 558.4219 ]], dtype=float32)),\n",
251
- " ('osd1',\n",
252
- " array([[ 98.3584 , 618.70605],\n",
253
- " [206.23535, 618.70605],\n",
254
- " [206.23535, 663.126 ],\n",
255
- " [ 98.3584 , 663.126 ]], dtype=float32)),\n",
256
- " ('fzozx',\n",
257
- " array([[203.0625 , 618.70605],\n",
258
- " [314.1123 , 618.70605],\n",
259
- " [314.1123 , 663.126 ],\n",
260
- " [203.0625 , 663.126 ]], dtype=float32)),\n",
261
- " ('leoost',\n",
262
- " array([[434.68066, 618.70605],\n",
263
- " [609.1875 , 618.70605],\n",
264
- " [609.1875 , 663.126 ],\n",
265
- " [434.68066, 663.126 ]], dtype=float32)),\n",
266
- " ('smony',\n",
267
- " array([[314.1123 , 621.8789 ],\n",
268
- " [415.64355, 621.8789 ],\n",
269
- " [415.64355, 663.126 ],\n",
270
- " [314.1123 , 663.126 ]], dtype=float32)),\n",
271
- " ('rcpt',\n",
272
- " array([[ 12.691406, 723.41016 ],\n",
273
- " [101.53125 , 723.41016 ],\n",
274
- " [101.53125 , 767.8301 ],\n",
275
- " [ 12.691406, 767.8301 ]], dtype=float32)),\n",
276
- " ('h2a81',\n",
277
- " array([[117.39551, 723.41016],\n",
278
- " [228.44531, 723.41016],\n",
279
- " [228.44531, 767.8301 ],\n",
280
- " [117.39551, 767.8301 ]], dtype=float32)),\n",
281
- " ('3a7',\n",
282
- " array([[218.92676, 723.41016],\n",
283
- " [291.90234, 723.41016],\n",
284
- " [291.90234, 767.8301 ],\n",
285
- " [218.92676, 767.8301 ]], dtype=float32)),\n",
286
- " ('rcft',\n",
287
- " array([[475.92773, 723.41016],\n",
288
- " [567.9404 , 723.41016],\n",
289
- " [567.9404 , 767.8301 ],\n",
290
- " [475.92773, 767.8301 ]], dtype=float32)),\n",
291
- " ('cnt',\n",
292
- " array([[580.63184, 723.41016],\n",
293
- " [647.2617 , 723.41016],\n",
294
- " [647.2617 , 764.6572 ],\n",
295
- " [580.63184, 764.6572 ]], dtype=float32)),\n",
296
- " ('ho',\n",
297
- " array([[637.74316, 723.41016],\n",
298
- " [694.8545 , 723.41016],\n",
299
- " [694.8545 , 767.8301 ],\n",
300
- " [637.74316, 767.8301 ]], dtype=float32)),\n",
301
- " ('storehsise',\n",
302
- " array([[ 12.691406, 774.1758 ],\n",
303
- " [231.61816 , 774.1758 ],\n",
304
- " [231.61816 , 818.5957 ],\n",
305
- " [ 12.691406, 818.5957 ]], dtype=float32)),\n",
306
- " ('snit',\n",
307
- " array([[434.68066, 774.1758 ],\n",
308
- " [504.4834 , 774.1758 ],\n",
309
- " [504.4834 , 818.5957 ],\n",
310
- " [434.68066, 818.5957 ]], dtype=float32)),\n",
311
- " ('xtiakt',\n",
312
- " array([[520.34766, 774.1758 ],\n",
313
- " [650.4346 , 774.1758 ],\n",
314
- " [650.4346 , 818.5957 ],\n",
315
- " [520.34766, 818.5957 ]], dtype=float32)),\n",
316
- " ('70',\n",
317
- " array([[647.2617, 774.1758],\n",
318
- " [694.8545, 774.1758],\n",
319
- " [694.8545, 818.5957],\n",
320
- " [647.2617, 818.5957]], dtype=float32)),\n",
321
- " ('091',\n",
322
- " array([[326.8037 , 821.76855],\n",
323
- " [396.60645, 821.76855],\n",
324
- " [396.60645, 869.3613 ],\n",
325
- " [326.8037 , 869.3613 ]], dtype=float32)),\n",
326
- " ('min',\n",
327
- " array([[ 15.864258, 824.9414 ],\n",
328
- " [ 85.66699 , 824.9414 ],\n",
329
- " [ 85.66699 , 869.3613 ],\n",
330
- " [ 15.864258, 869.3613 ]], dtype=float32)),\n",
331
- " ('1811201',\n",
332
- " array([[161.81543, 824.9414 ],\n",
333
- " [310.93945, 824.9414 ],\n",
334
- " [310.93945, 869.3613 ],\n",
335
- " [161.81543, 869.3613 ]], dtype=float32)),\n",
336
- " ('105s1',\n",
337
- " array([[437.85352, 824.9414 ],\n",
338
- " [520.34766, 824.9414 ],\n",
339
- " [520.34766, 869.3613 ],\n",
340
- " [437.85352, 869.3613 ]], dtype=float32)),\n",
341
- " ('ha',\n",
342
- " array([[ 98.3584 , 828.11426],\n",
343
- " [139.60547, 828.11426],\n",
344
- " [139.60547, 869.3613 ],\n",
345
- " [ 98.3584 , 869.3613 ]], dtype=float32)),\n",
346
- " ('41',\n",
347
- " array([[393.4336 , 828.11426],\n",
348
- " [441.02637, 828.11426],\n",
349
- " [441.02637, 869.3613 ],\n",
350
- " [393.4336 , 869.3613 ]], dtype=float32)),\n",
351
- " ('staff',\n",
352
- " array([[ 12.691406, 878.8799 ],\n",
353
- " [126.91406 , 878.8799 ],\n",
354
- " [126.91406 , 923.2998 ],\n",
355
- " [ 12.691406, 923.2998 ]], dtype=float32)),\n",
356
- " ('angel',\n",
357
- " array([[142.77832, 878.8799 ],\n",
358
- " [247.48242, 878.8799 ],\n",
359
- " [247.48242, 923.2998 ],\n",
360
- " [142.77832, 923.2998 ]], dtype=float32)),\n",
361
- " ('duantle',\n",
362
- " array([[329.97656, 878.8799 ],\n",
363
- " [463.23633, 878.8799 ],\n",
364
- " [463.23633, 923.2998 ],\n",
365
- " [329.97656, 923.2998 ]], dtype=float32)),\n",
366
- " ('i',\n",
367
- " array([[250.65527, 885.2256 ],\n",
368
- " [266.51953, 885.2256 ],\n",
369
- " [266.51953, 916.9541 ],\n",
370
- " [250.65527, 916.9541 ]], dtype=float32)),\n",
371
- " ('ca',\n",
372
- " array([[263.34668, 885.2256 ],\n",
373
- " [314.1123 , 885.2256 ],\n",
374
- " [314.1123 , 923.2998 ],\n",
375
- " [263.34668, 923.2998 ]], dtype=float32)),\n",
376
- " ('fkoreanbun',\n",
377
- " array([[ 15.864258, 980.41113 ],\n",
378
- " [ 250.65527 , 980.41113 ],\n",
379
- " [ 250.65527 , 1024.831 ],\n",
380
- " [ 15.864258, 1024.831 ]], dtype=float32)),\n",
381
- " ('s5',\n",
382
- " array([[ 561.5947 , 980.41113],\n",
383
- " [ 612.36035, 980.41113],\n",
384
- " [ 612.36035, 1021.6582 ],\n",
385
- " [ 561.5947 , 1021.6582 ]], dtype=float32)),\n",
386
- " ('oflj',\n",
387
- " array([[ 621.8789 , 980.41113],\n",
388
- " [ 694.8545 , 980.41113],\n",
389
- " [ 694.8545 , 1021.6582 ],\n",
390
- " [ 621.8789 , 1021.6582 ]], dtype=float32)),\n",
391
- " ('nis',\n",
392
- " array([[ 15.864258, 1031.1768 ],\n",
393
- " [ 60.28418 , 1031.1768 ],\n",
394
- " [ 60.28418 , 1075.5967 ],\n",
395
- " [ 15.864258, 1075.5967 ]], dtype=float32)),\n",
396
- " ('inyasabeetig',\n",
397
- " array([[ 104.7041 , 1031.1768 ],\n",
398
- " [ 377.56934, 1031.1768 ],\n",
399
- " [ 377.56934, 1078.7695 ],\n",
400
- " [ 104.7041 , 1078.7695 ]], dtype=float32)),\n",
401
- " ('40',\n",
402
- " array([[ 561.5947, 1031.1768],\n",
403
- " [ 615.5332, 1031.1768],\n",
404
- " [ 615.5332, 1072.4238],\n",
405
- " [ 561.5947, 1072.4238]], dtype=float32)),\n",
406
- " ('oov',\n",
407
- " array([[ 621.8789, 1031.1768],\n",
408
- " [ 694.8545, 1031.1768],\n",
409
- " [ 694.8545, 1072.4238],\n",
410
- " [ 621.8789, 1072.4238]], dtype=float32)),\n",
411
- " ('ss',\n",
412
- " array([[ 53.938477, 1034.3496 ],\n",
413
- " [ 104.7041 , 1034.3496 ],\n",
414
- " [ 104.7041 , 1075.5967 ],\n",
415
- " [ 53.938477, 1075.5967 ]], dtype=float32)),\n",
416
- " ('behotogcremychees',\n",
417
- " array([[ 12.691406, 1081.9424 ],\n",
418
- " [ 399.7793 , 1081.9424 ],\n",
419
- " [ 399.7793 , 1129.5352 ],\n",
420
- " [ 12.691406, 1129.5352 ]], dtype=float32)),\n",
421
- " ('19',\n",
422
- " array([[ 139.60547, 1132.708 ],\n",
423
- " [ 190.3711 , 1132.708 ],\n",
424
- " [ 190.3711 , 1177.1279 ],\n",
425
- " [ 139.60547, 1177.1279 ]], dtype=float32)),\n",
426
- " ('do',\n",
427
- " array([[ 203.0625 , 1135.8809 ],\n",
428
- " [ 250.65527, 1135.8809 ],\n",
429
- " [ 250.65527, 1177.1279 ],\n",
430
- " [ 203.0625 , 1177.1279 ]], dtype=float32)),\n",
431
- " ('a',\n",
432
- " array([[ 266.51953, 1139.0537 ],\n",
433
- " [ 288.7295 , 1139.0537 ],\n",
434
- " [ 288.7295 , 1173.9551 ],\n",
435
- " [ 266.51953, 1173.9551 ]], dtype=float32)),\n",
436
- " ('b',\n",
437
- " array([[ 368.05078, 1135.8809 ],\n",
438
- " [ 396.60645, 1135.8809 ],\n",
439
- " [ 396.60645, 1173.9551 ],\n",
440
- " [ 368.05078, 1173.9551 ]], dtype=float32)),\n",
441
- " ('1544',\n",
442
- " array([[ 539.38477, 1135.8809 ],\n",
443
- " [ 615.5332 , 1135.8809 ],\n",
444
- " [ 615.5332 , 1177.1279 ],\n",
445
- " [ 539.38477, 1177.1279 ]], dtype=float32)),\n",
446
- " ('oou',\n",
447
- " array([[ 621.8789, 1135.8809],\n",
448
- " [ 694.8545, 1135.8809],\n",
449
- " [ 694.8545, 1177.1279],\n",
450
- " [ 621.8789, 1177.1279]], dtype=float32)),\n",
451
- " ('choeog',\n",
452
- " array([[ 266.51953, 1183.4736 ],\n",
453
- " [ 399.7793 , 1183.4736 ],\n",
454
- " [ 399.7793 , 1231.0664 ],\n",
455
- " [ 266.51953, 1231.0664 ]], dtype=float32)),\n",
456
- " ('chocvronz',\n",
457
- " array([[ 12.691406, 1186.6465 ],\n",
458
- " [ 209.4082 , 1186.6465 ],\n",
459
- " [ 209.4082 , 1231.0664 ],\n",
460
- " [ 12.691406, 1231.0664 ]], dtype=float32)),\n",
461
- " ('in1',\n",
462
- " array([[ 206.23535, 1186.6465 ],\n",
463
- " [ 269.69238, 1186.6465 ],\n",
464
- " [ 269.69238, 1227.8936 ],\n",
465
- " [ 206.23535, 1227.8936 ]], dtype=float32)),\n",
466
- " ('1s',\n",
467
- " array([[ 142.77832, 1237.4121 ],\n",
468
- " [ 206.23535, 1237.4121 ],\n",
469
- " [ 206.23535, 1281.832 ],\n",
470
- " [ 142.77832, 1281.832 ]], dtype=float32)),\n",
471
- " ('0',\n",
472
- " array([[ 203.0625 , 1237.4121 ],\n",
473
- " [ 250.65527, 1237.4121 ],\n",
474
- " [ 250.65527, 1281.832 ],\n",
475
- " [ 203.0625 , 1281.832 ]], dtype=float32)),\n",
476
- " ('x',\n",
477
- " array([[ 263.34668, 1237.4121 ],\n",
478
- " [ 291.90234, 1237.4121 ],\n",
479
- " [ 291.90234, 1275.4863 ],\n",
480
- " [ 263.34668, 1275.4863 ]], dtype=float32)),\n",
481
- " ('l',\n",
482
- " array([[ 371.22363, 1237.4121 ],\n",
483
- " [ 396.60645, 1237.4121 ],\n",
484
- " [ 396.60645, 1275.4863 ],\n",
485
- " [ 371.22363, 1275.4863 ]], dtype=float32)),\n",
486
- " ('50',\n",
487
- " array([[ 561.5947, 1237.4121],\n",
488
- " [ 615.5332, 1237.4121],\n",
489
- " [ 615.5332, 1278.6592],\n",
490
- " [ 561.5947, 1278.6592]], dtype=float32)),\n",
491
- " ('doq',\n",
492
- " array([[ 621.8789, 1237.4121],\n",
493
- " [ 694.8545, 1237.4121],\n",
494
- " [ 694.8545, 1278.6592],\n",
495
- " [ 621.8789, 1278.6592]], dtype=float32)),\n",
496
- " ('total',\n",
497
- " array([[ 15.864258, 1338.9434 ],\n",
498
- " [ 120.56836 , 1338.9434 ],\n",
499
- " [ 120.56836 , 1386.5361 ],\n",
500
- " [ 15.864258, 1386.5361 ]], dtype=float32)),\n",
501
- " ('10',\n",
502
- " array([[ 145.95117, 1338.9434 ],\n",
503
- " [ 225.27246, 1338.9434 ],\n",
504
- " [ 225.27246, 1383.3633 ],\n",
505
- " [ 145.95117, 1383.3633 ]], dtype=float32)),\n",
506
- " ('3599',\n",
507
- " array([[ 558.4219 , 1338.9434 ],\n",
508
- " [ 637.74316, 1338.9434 ],\n",
509
- " [ 637.74316, 1383.3633 ],\n",
510
- " [ 558.4219 , 1383.3633 ]], dtype=float32)),\n",
511
- " ('oq',\n",
512
- " array([[ 640.916 , 1342.1162],\n",
513
- " [ 694.8545, 1342.1162],\n",
514
- " [ 694.8545, 1383.3633],\n",
515
- " [ 640.916 , 1383.3633]], dtype=float32)),\n",
516
- " ('cash',\n",
517
- " array([[ 53.938477, 1389.709 ],\n",
518
- " [ 149.12402 , 1389.709 ],\n",
519
- " [ 149.12402 , 1434.1289 ],\n",
520
- " [ 53.938477, 1434.1289 ]], dtype=float32)),\n",
521
- " ('dool',\n",
522
- " array([[ 558.4219, 1389.709 ],\n",
523
- " [ 647.2617, 1389.709 ],\n",
524
- " [ 647.2617, 1434.1289],\n",
525
- " [ 558.4219, 1434.1289]], dtype=float32)),\n",
526
- " ('o0',\n",
527
- " array([[ 640.916 , 1389.709 ],\n",
528
- " [ 691.68164, 1389.709 ],\n",
529
- " [ 691.68164, 1434.1289 ],\n",
530
- " [ 640.916 , 1434.1289 ]], dtype=float32)),\n",
531
- " ('change',\n",
532
- " array([[ 53.938477, 1440.4746 ],\n",
533
- " [ 187.19824 , 1440.4746 ],\n",
534
- " [ 187.19824 , 1484.8945 ],\n",
535
- " [ 53.938477, 1484.8945 ]], dtype=float32)),\n",
536
- " ('841',\n",
537
- " array([[ 558.4219, 1440.4746],\n",
538
- " [ 628.2246, 1440.4746],\n",
539
- " [ 628.2246, 1484.8945],\n",
540
- " [ 558.4219, 1484.8945]], dtype=float32)),\n",
541
- " ('sdo',\n",
542
- " array([[ 625.05176, 1440.4746 ],\n",
543
- " [ 694.8545 , 1440.4746 ],\n",
544
- " [ 694.8545 , 1484.8945 ],\n",
545
- " [ 625.05176, 1484.8945 ]], dtype=float32)),\n",
546
- " ('vatable',\n",
547
- " array([[ 53.938477, 1545.1787 ],\n",
548
- " [ 209.4082 , 1545.1787 ],\n",
549
- " [ 209.4082 , 1589.5986 ],\n",
550
- " [ 53.938477, 1589.5986 ]], dtype=float32)),\n",
551
- " ('szos',\n",
552
- " array([[ 558.4219 , 1545.1787 ],\n",
553
- " [ 644.08887, 1545.1787 ],\n",
554
- " [ 644.08887, 1589.5986 ],\n",
555
- " [ 558.4219 , 1589.5986 ]], dtype=float32)),\n",
556
- " ('54',\n",
557
- " array([[ 640.916 , 1545.1787 ],\n",
558
- " [ 691.68164, 1545.1787 ],\n",
559
- " [ 691.68164, 1589.5986 ],\n",
560
- " [ 640.916 , 1589.5986 ]], dtype=float32)),\n",
561
- " ('vat',\n",
562
- " array([[ 53.938477, 1595.9443 ],\n",
563
- " [ 145.95117 , 1595.9443 ],\n",
564
- " [ 145.95117 , 1646.71 ],\n",
565
- " [ 53.938477, 1646.71 ]], dtype=float32)),\n",
566
- " ('8b',\n",
567
- " array([[ 580.63184, 1595.9443 ],\n",
568
- " [ 644.08887, 1595.9443 ],\n",
569
- " [ 644.08887, 1640.3643 ],\n",
570
- " [ 580.63184, 1640.3643 ]], dtype=float32)),\n",
571
- " ('tax',\n",
572
- " array([[ 139.60547, 1599.1172 ],\n",
573
- " [ 209.4082 , 1599.1172 ],\n",
574
- " [ 209.4082 , 1640.3643 ],\n",
575
- " [ 139.60547, 1640.3643 ]], dtype=float32)),\n",
576
- " ('4g',\n",
577
- " array([[ 644.08887, 1599.1172 ],\n",
578
- " [ 691.68164, 1599.1172 ],\n",
579
- " [ 691.68164, 1640.3643 ],\n",
580
- " [ 644.08887, 1640.3643 ]], dtype=float32)),\n",
581
- " ('zerd',\n",
582
- " array([[ 53.938477, 1646.71 ],\n",
583
- " [ 149.12402 , 1646.71 ],\n",
584
- " [ 149.12402 , 1694.3027 ],\n",
585
- " [ 53.938477, 1694.3027 ]], dtype=float32)),\n",
586
- " ('ra',\n",
587
- " array([[ 158.64258, 1649.8828 ],\n",
588
- " [ 209.4082 , 1649.8828 ],\n",
589
- " [ 209.4082 , 1694.3027 ],\n",
590
- " [ 158.64258, 1694.3027 ]], dtype=float32)),\n",
591
- " ('ted',\n",
592
- " array([[ 203.0625 , 1649.8828 ],\n",
593
- " [ 272.86523, 1649.8828 ],\n",
594
- " [ 272.86523, 1691.1299 ],\n",
595
- " [ 203.0625 , 1691.1299 ]], dtype=float32)),\n",
596
- " ('0',\n",
597
- " array([[ 599.66895, 1649.8828 ],\n",
598
- " [ 628.2246 , 1649.8828 ],\n",
599
- " [ 628.2246 , 1687.957 ],\n",
600
- " [ 599.66895, 1687.957 ]], dtype=float32)),\n",
601
- " ('00',\n",
602
- " array([[ 640.916 , 1649.8828],\n",
603
- " [ 694.8545, 1649.8828],\n",
604
- " [ 694.8545, 1691.1299],\n",
605
- " [ 640.916 , 1691.1299]], dtype=float32)),\n",
606
- " ('vat',\n",
607
- " array([[ 53.938477, 1700.6484 ],\n",
608
- " [ 123.74121 , 1700.6484 ],\n",
609
- " [ 123.74121 , 1745.0684 ],\n",
610
- " [ 53.938477, 1745.0684 ]], dtype=float32)),\n",
611
- " ('mexept',\n",
612
- " array([[ 117.39551, 1700.6484 ],\n",
613
- " [ 257.00098, 1700.6484 ],\n",
614
- " [ 257.00098, 1748.2412 ],\n",
615
- " [ 117.39551, 1748.2412 ]], dtype=float32)),\n",
616
- " ('ted',\n",
617
- " array([[ 247.48242, 1700.6484 ],\n",
618
- " [ 314.1123 , 1700.6484 ],\n",
619
- " [ 314.1123 , 1745.0684 ],\n",
620
- " [ 247.48242, 1745.0684 ]], dtype=float32)),\n",
621
- " ('0',\n",
622
- " array([[ 602.8418, 1703.8213],\n",
623
- " [ 628.2246, 1703.8213],\n",
624
- " [ 628.2246, 1738.7227],\n",
625
- " [ 602.8418, 1738.7227]], dtype=float32)),\n",
626
- " ('od',\n",
627
- " array([[ 640.916 , 1703.8213 ],\n",
628
- " [ 691.68164, 1703.8213 ],\n",
629
- " [ 691.68164, 1741.8955 ],\n",
630
- " [ 640.916 , 1741.8955 ]], dtype=float32)),\n",
631
- " ('7616664',\n",
632
- " array([[ 329.97656, 1799.0068 ],\n",
633
- " [ 482.27344, 1799.0068 ],\n",
634
- " [ 482.27344, 1846.5996 ],\n",
635
- " [ 329.97656, 1846.5996 ]], dtype=float32)),\n",
636
- " ('sol',\n",
637
- " array([[ 12.691406, 1802.1797 ],\n",
638
- " [ 79.32129 , 1802.1797 ],\n",
639
- " [ 79.32129 , 1846.5996 ],\n",
640
- " [ 12.691406, 1846.5996 ]], dtype=float32)),\n",
641
- " ('d',\n",
642
- " array([[ 79.32129, 1805.3525 ],\n",
643
- " [ 101.53125, 1805.3525 ],\n",
644
- " [ 101.53125, 1843.4268 ],\n",
645
- " [ 79.32129, 1843.4268 ]], dtype=float32)),\n",
646
- " ('tos',\n",
647
- " array([[ 120.56836, 1802.1797 ],\n",
648
- " [ 184.02539, 1802.1797 ],\n",
649
- " [ 184.02539, 1846.5996 ],\n",
650
- " [ 120.56836, 1846.5996 ]], dtype=float32)),\n",
651
- " ('hobos',\n",
652
- " array([[ 203.0625, 1802.1797],\n",
653
- " [ 333.1494, 1802.1797],\n",
654
- " [ 333.1494, 1846.5996],\n",
655
- " [ 203.0625, 1846.5996]], dtype=float32)),\n",
656
- " ('name',\n",
657
- " array([[ 12.691406, 1852.9453 ],\n",
658
- " [ 104.7041 , 1852.9453 ],\n",
659
- " [ 104.7041 , 1897.3652 ],\n",
660
- " [ 12.691406, 1897.3652 ]], dtype=float32)),\n",
661
- " ('eeten',\n",
662
- " array([[ 126.91406, 1887.8467 ],\n",
663
- " [ 199.88965, 1887.8467 ],\n",
664
- " [ 199.88965, 1897.3652 ],\n",
665
- " [ 126.91406, 1897.3652 ]], dtype=float32)),\n",
666
- " ('addr',\n",
667
- " array([[ 12.691406, 1906.8838 ],\n",
668
- " [ 104.7041 , 1906.8838 ],\n",
669
- " [ 104.7041 , 1948.1309 ],\n",
670
- " [ 12.691406, 1948.1309 ]], dtype=float32)),\n",
671
- " ('ess',\n",
672
- " array([[ 98.3584 , 1910.0566 ],\n",
673
- " [ 168.16113, 1910.0566 ],\n",
674
- " [ 168.16113, 1951.3037 ],\n",
675
- " [ 98.3584 , 1951.3037 ]], dtype=float32)),\n",
676
- " ('tins',\n",
677
- " array([[ 12.691406, 1954.4766 ],\n",
678
- " [ 98.3584 , 1954.4766 ],\n",
679
- " [ 98.3584 , 1998.8965 ],\n",
680
- " [ 12.691406, 1998.8965 ]], dtype=float32)),\n",
681
- " ('fpti',\n",
682
- " array([[ 13.045723, 2057.3926 ],\n",
683
- " [ 81.36672 , 2062.2727 ],\n",
684
- " [ 78.322716, 2104.889 ],\n",
685
- " [ 10.001719, 2100.0088 ]], dtype=float32)),\n",
686
- " ('ippl',\n",
687
- " array([[ 101.53125, 2059.1807 ],\n",
688
- " [ 171.33398, 2059.1807 ],\n",
689
- " [ 171.33398, 2106.7734 ],\n",
690
- " [ 101.53125, 2106.7734 ]], dtype=float32)),\n",
691
- " ('seven',\n",
692
- " array([[ 241.13672, 2059.1807 ],\n",
693
- " [ 355.35938, 2059.1807 ],\n",
694
- " [ 355.35938, 2103.6006 ],\n",
695
- " [ 241.13672, 2103.6006 ]], dtype=float32)),\n",
696
- " ('corpor',\n",
697
- " array([[ 371.50757, 2057.8103 ],\n",
698
- " [ 499.50806, 2065.8103 ],\n",
699
- " [ 496.71796, 2110.4524 ],\n",
700
- " [ 368.7175 , 2102.4526 ]], dtype=float32)),\n",
701
- " ('s',\n",
702
- " array([[ 164.98828, 2065.5264 ],\n",
703
- " [ 180.85254, 2065.5264 ],\n",
704
- " [ 180.85254, 2100.4277 ],\n",
705
- " [ 164.98828, 2100.4277 ]], dtype=float32)),\n",
706
- " ('ne',\n",
707
- " array([[ 177.67969, 2062.3535 ],\n",
708
- " [ 228.44531, 2062.3535 ],\n",
709
- " [ 228.44531, 2103.6006 ],\n",
710
- " [ 177.67969, 2103.6006 ]], dtype=float32)),\n",
711
- " ('at',\n",
712
- " array([[ 494.96484, 2062.3535 ],\n",
713
- " [ 542.5576 , 2062.3535 ],\n",
714
- " [ 542.5576 , 2103.6006 ],\n",
715
- " [ 494.96484, 2103.6006 ]], dtype=float32)),\n",
716
- " ('on',\n",
717
- " array([[ 558.4219, 2065.5264],\n",
718
- " [ 609.1875, 2065.5264],\n",
719
- " [ 609.1875, 2103.6006],\n",
720
- " [ 558.4219, 2103.6006]], dtype=float32)),\n",
721
- " ('jth',\n",
722
- " array([[ 12.691406, 2109.9463 ],\n",
723
- " [ 82.49414 , 2109.9463 ],\n",
724
- " [ 82.49414 , 2154.3662 ],\n",
725
- " [ 12.691406, 2154.3662 ]], dtype=float32)),\n",
726
- " ('the',\n",
727
- " array([[ 225.27246, 2109.9463 ],\n",
728
- " [ 291.90234, 2109.9463 ],\n",
729
- " [ 291.90234, 2154.3662 ],\n",
730
- " [ 225.27246, 2154.3662 ]], dtype=float32)),\n",
731
- " ('co',\n",
732
- " array([[ 304.59375, 2109.9463 ],\n",
733
- " [ 355.35938, 2109.9463 ],\n",
734
- " [ 355.35938, 2154.3662 ],\n",
735
- " [ 304.59375, 2154.3662 ]], dtype=float32)),\n",
736
- " ('tower',\n",
737
- " array([[ 498.1377 , 2109.9463 ],\n",
738
- " [ 606.01465, 2109.9463 ],\n",
739
- " [ 606.01465, 2154.3662 ],\n",
740
- " [ 498.1377 , 2154.3662 ]], dtype=float32)),\n",
741
- " ('f',\n",
742
- " array([[ 95.18555, 2113.1191 ],\n",
743
- " [ 120.56836, 2113.1191 ],\n",
744
- " [ 120.56836, 2151.1934 ],\n",
745
- " [ 95.18555, 2151.1934 ]], dtype=float32)),\n",
746
- " ('t',\n",
747
- " array([[ 352.18652, 2116.292 ],\n",
748
- " [ 368.05078, 2116.292 ],\n",
749
- " [ 368.05078, 2151.1934 ],\n",
750
- " [ 352.18652, 2151.1934 ]], dtype=float32)),\n",
751
- " ('iqor',\n",
752
- " array([[ 120.56836, 2116.292 ],\n",
753
- " [ 206.23535, 2116.292 ],\n",
754
- " [ 206.23535, 2154.3662 ],\n",
755
- " [ 120.56836, 2154.3662 ]], dtype=float32)),\n",
756
- " ('umb',\n",
757
- " array([[ 368.05078, 2116.292 ],\n",
758
- " [ 441.02637, 2116.292 ],\n",
759
- " [ 441.02637, 2154.3662 ],\n",
760
- " [ 368.05078, 2154.3662 ]], dtype=float32)),\n",
761
- " ('id',\n",
762
- " array([[ 434.68066, 2116.292 ],\n",
763
- " [ 479.1006 , 2116.292 ],\n",
764
- " [ 479.1006 , 2154.3662 ],\n",
765
- " [ 434.68066, 2154.3662 ]], dtype=float32)),\n",
766
- " ('avenues',\n",
767
- " array([[ 203.0625 , 2160.712 ],\n",
768
- " [ 349.01367, 2160.712 ],\n",
769
- " [ 349.01367, 2208.3047 ],\n",
770
- " [ 203.0625 , 2208.3047 ]], dtype=float32)),\n",
771
- " ('ort',\n",
772
- " array([[ 31.728516, 2163.8848 ],\n",
773
- " [ 101.53125 , 2163.8848 ],\n",
774
- " [ 101.53125 , 2205.1318 ],\n",
775
- " [ 31.728516, 2205.1318 ]], dtype=float32)),\n",
776
- " ('i',\n",
777
- " array([[ 101.53125, 2167.0576 ],\n",
778
- " [ 114.22266, 2167.0576 ],\n",
779
- " [ 114.22266, 2198.7861 ],\n",
780
- " [ 101.53125, 2198.7861 ]], dtype=float32)),\n",
781
- " ('manda',\n",
782
- " array([[ 368.05078, 2163.8848 ],\n",
783
- " [ 479.1006 , 2163.8848 ],\n",
784
- " [ 479.1006 , 2205.1318 ],\n",
785
- " [ 368.05078, 2205.1318 ]], dtype=float32)),\n",
786
- " ('gas',\n",
787
- " array([[ 117.39551, 2167.0576 ],\n",
788
- " [ 187.19824, 2167.0576 ],\n",
789
- " [ 187.19824, 2211.4775 ],\n",
790
- " [ 117.39551, 2211.4775 ]], dtype=float32)),\n",
791
- " ('l',\n",
792
- " array([[ 479.1006 , 2170.2305 ],\n",
793
- " [ 488.61914, 2170.2305 ],\n",
794
- " [ 488.61914, 2195.6133 ],\n",
795
- " [ 479.1006 , 2195.6133 ]], dtype=float32)),\n",
796
- " ('lyonig',\n",
797
- " array([[ 494.96484, 2170.2305 ],\n",
798
- " [ 606.01465, 2170.2305 ],\n",
799
- " [ 606.01465, 2211.4775 ],\n",
800
- " [ 494.96484, 2211.4775 ]], dtype=float32)),\n",
801
- " ('ci',\n",
802
- " array([[ 31.728516, 2214.6504 ],\n",
803
- " [ 79.32129 , 2214.6504 ],\n",
804
- " [ 79.32129 , 2259.0703 ],\n",
805
- " [ 31.728516, 2259.0703 ]], dtype=float32)),\n",
806
- " ('ty',\n",
807
- " array([[ 76.14844, 2217.8232 ],\n",
808
- " [ 123.74121, 2217.8232 ],\n",
809
- " [ 123.74121, 2259.0703 ],\n",
810
- " [ 76.14844, 2259.0703 ]], dtype=float32)),\n",
811
- " ('sgrooo',\n",
812
- " array([[ 304.59375, 2262.2432 ],\n",
813
- " [ 441.02637, 2262.2432 ],\n",
814
- " [ 441.02637, 2309.836 ],\n",
815
- " [ 304.59375, 2309.836 ]], dtype=float32)),\n",
816
- " ('tins',\n",
817
- " array([[ 15.864258, 2265.416 ],\n",
818
- " [ 98.3584 , 2265.416 ],\n",
819
- " [ 98.3584 , 2309.836 ],\n",
820
- " [ 15.864258, 2309.836 ]], dtype=float32)),\n",
821
- " ('doo',\n",
822
- " array([[ 117.39551, 2265.416 ],\n",
823
- " [ 203.0625 , 2265.416 ],\n",
824
- " [ 203.0625 , 2309.836 ],\n",
825
- " [ 117.39551, 2309.836 ]], dtype=float32)),\n",
826
- " ('sioul',\n",
827
- " array([[ 199.88965, 2265.416 ],\n",
828
- " [ 310.93945, 2265.416 ],\n",
829
- " [ 310.93945, 2309.836 ],\n",
830
- " [ 199.88965, 2309.836 ]], dtype=float32)),\n",
831
- " ('bir',\n",
832
- " array([[ 12.691406, 2316.1816 ],\n",
833
- " [ 82.49414 , 2316.1816 ],\n",
834
- " [ 82.49414 , 2360.6016 ],\n",
835
- " [ 12.691406, 2360.6016 ]], dtype=float32)),\n",
836
- " ('accr',\n",
837
- " array([[ 95.18555, 2319.3545 ],\n",
838
- " [ 187.19824, 2319.3545 ],\n",
839
- " [ 187.19824, 2363.7744 ],\n",
840
- " [ 95.18555, 2363.7744 ]], dtype=float32)),\n",
841
- " ('h',\n",
842
- " array([[ 203.0625 , 2322.5273 ],\n",
843
- " [ 225.27246, 2322.5273 ],\n",
844
- " [ 225.27246, 2357.4287 ],\n",
845
- " [ 203.0625 , 2357.4287 ]], dtype=float32)),\n",
846
- " ('smooojso1',\n",
847
- " array([[ 72.975586, 2366.9473 ],\n",
848
- " [ 263.34668 , 2366.9473 ],\n",
849
- " [ 263.34668 , 2411.3672 ],\n",
850
- " [ 72.975586, 2411.3672 ]], dtype=float32)),\n",
851
- " ('sjuousa',\n",
852
- " array([[ 263.34668, 2366.9473 ],\n",
853
- " [ 479.1006 , 2366.9473 ],\n",
854
- " [ 479.1006 , 2411.3672 ],\n",
855
- " [ 263.34668, 2411.3672 ]], dtype=float32)),\n",
856
- " ('96oz',\n",
857
- " array([[ 494.96484, 2366.9473 ],\n",
858
- " [ 586.97754, 2366.9473 ],\n",
859
- " [ 586.97754, 2411.3672 ],\n",
860
- " [ 494.96484, 2411.3672 ]], dtype=float32)),\n",
861
- " ('11',\n",
862
- " array([[ 34.901367, 2370.12 ],\n",
863
- " [ 79.32129 , 2370.12 ],\n",
864
- " [ 79.32129 , 2411.3672 ],\n",
865
- " [ 34.901367, 2411.3672 ]], dtype=float32)),\n",
866
- " ('accrdater',\n",
867
- " array([[ 12.691406, 2417.713 ],\n",
868
- " [ 203.0625 , 2417.713 ],\n",
869
- " [ 203.0625 , 2465.3057 ],\n",
870
- " [ 12.691406, 2465.3057 ]], dtype=float32)),\n",
871
- " ('d8i01',\n",
872
- " array([[ 222.09961, 2417.713 ],\n",
873
- " [ 329.97656, 2417.713 ],\n",
874
- " [ 329.97656, 2462.1328 ],\n",
875
- " [ 222.09961, 2462.1328 ]], dtype=float32)),\n",
876
- " ('220',\n",
877
- " array([[ 329.97656, 2417.713 ],\n",
878
- " [ 441.02637, 2417.713 ],\n",
879
- " [ 441.02637, 2462.1328 ],\n",
880
- " [ 329.97656, 2462.1328 ]], dtype=float32)),\n",
881
- " ('17',\n",
882
- " array([[ 31.728516, 2471.6514 ],\n",
883
- " [ 85.66699 , 2471.6514 ],\n",
884
- " [ 85.66699 , 2512.8984 ],\n",
885
- " [ 31.728516, 2512.8984 ]], dtype=float32)),\n",
886
- " ('151',\n",
887
- " array([[ 76.14844, 2471.6514 ],\n",
888
- " [ 139.60547, 2471.6514 ],\n",
889
- " [ 139.60547, 2516.0713 ],\n",
890
- " [ 76.14844, 2516.0713 ]], dtype=float32)),\n",
891
- " ('izoz5',\n",
892
- " array([[ 139.60547, 2471.6514 ],\n",
893
- " [ 250.65527, 2471.6514 ],\n",
894
- " [ 250.65527, 2516.0713 ],\n",
895
- " [ 139.60547, 2516.0713 ]], dtype=float32)),\n",
896
- " ('fermi',\n",
897
- " array([[ 12.691406, 2519.2441 ],\n",
898
- " [ 120.56836 , 2519.2441 ],\n",
899
- " [ 120.56836 , 2566.837 ],\n",
900
- " [ 12.691406, 2566.837 ]], dtype=float32)),\n",
901
- " ('t',\n",
902
- " array([[ 117.39551, 2525.5898 ],\n",
903
- " [ 142.77832, 2525.5898 ],\n",
904
- " [ 142.77832, 2563.664 ],\n",
905
- " [ 117.39551, 2563.664 ]], dtype=float32)),\n",
906
- " ('hs',\n",
907
- " array([[ 158.64258, 2525.5898 ],\n",
908
- " [ 199.88965, 2525.5898 ],\n",
909
- " [ 199.88965, 2563.664 ],\n",
910
- " [ 158.64258, 2563.664 ]], dtype=float32)),\n",
911
- " ('fpzoirtias',\n",
912
- " array([[ 31.728516, 2570.0098 ],\n",
913
- " [ 479.1006 , 2570.0098 ],\n",
914
- " [ 479.1006 , 2617.6025 ],\n",
915
- " [ 31.728516, 2617.6025 ]], dtype=float32)),\n",
916
- " ('dooniz',\n",
917
- " array([[ 469.58203, 2573.1826 ],\n",
918
- " [ 586.97754, 2573.1826 ],\n",
919
- " [ 586.97754, 2617.6025 ],\n",
920
- " [ 469.58203, 2617.6025 ]], dtype=float32)),\n",
921
- " ('get',\n",
922
- " array([[ 31.728516, 2674.7139 ],\n",
923
- " [ 101.53125 , 2674.7139 ],\n",
924
- " [ 101.53125 , 2719.1338 ],\n",
925
- " [ 31.728516, 2719.1338 ]], dtype=float32)),\n",
926
- " ('for',\n",
927
- " array([[ 602.8418, 2674.7139],\n",
928
- " [ 669.4717, 2674.7139],\n",
929
- " [ 669.4717, 2719.1338],\n",
930
- " [ 602.8418, 2719.1338]], dtype=float32)),\n",
931
- " ('chance',\n",
932
- " array([[ 158.87543, 2676.548 ],\n",
933
- " [ 292.87747, 2680.6086 ],\n",
934
- " [ 291.59088, 2723.0664 ],\n",
935
- " [ 157.58882, 2719.0059 ]], dtype=float32)),\n",
936
- " ('to',\n",
937
- " array([[ 304.59375, 2677.8867 ],\n",
938
- " [ 355.35938, 2677.8867 ],\n",
939
- " [ 355.35938, 2719.1338 ],\n",
940
- " [ 304.59375, 2719.1338 ]], dtype=float32)),\n",
941
- " ('win',\n",
942
- " array([[ 368.05078, 2677.8867 ],\n",
943
- " [ 441.02637, 2677.8867 ],\n",
944
- " [ 441.02637, 2722.3066 ],\n",
945
- " [ 368.05078, 2722.3066 ]], dtype=float32)),\n",
946
- " ('trip',\n",
947
- " array([[ 494.96484, 2677.8867 ],\n",
948
- " [ 586.97754, 2677.8867 ],\n",
949
- " [ 586.97754, 2722.3066 ],\n",
950
- " [ 494.96484, 2722.3066 ]], dtype=float32)),\n",
951
- " ('t',\n",
952
- " array([[ 114.22266, 2681.0596 ],\n",
953
- " [ 139.60547, 2681.0596 ],\n",
954
- " [ 139.60547, 2715.961 ],\n",
955
- " [ 114.22266, 2715.961 ]], dtype=float32)),\n",
956
- " ('a',\n",
957
- " array([[ 453.71777, 2687.4053 ],\n",
958
- " [ 475.92773, 2687.4053 ],\n",
959
- " [ 475.92773, 2719.1338 ],\n",
960
- " [ 453.71777, 2719.1338 ]], dtype=float32)),\n",
961
- " ('f',\n",
962
- " array([[ 57.11133, 2731.8252 ],\n",
963
- " [ 79.32129, 2731.8252 ],\n",
964
- " [ 79.32129, 2769.8994 ],\n",
965
- " [ 57.11133, 2769.8994 ]], dtype=float32)),\n",
966
- " ('to',\n",
967
- " array([[ 95.18555, 2728.6523 ],\n",
968
- " [ 142.77832, 2728.6523 ],\n",
969
- " [ 142.77832, 2773.0723 ],\n",
970
- " [ 95.18555, 2773.0723 ]], dtype=float32)),\n",
971
- " ('kored',\n",
972
- " array([[ 158.64258, 2728.6523 ],\n",
973
- " [ 269.69238, 2728.6523 ],\n",
974
- " [ 269.69238, 2773.0723 ],\n",
975
- " [ 158.64258, 2773.0723 ]], dtype=float32)),\n",
976
- " ('pis0',\n",
977
- " array([[ 558.4219, 2728.6523],\n",
978
- " [ 650.4346, 2728.6523],\n",
979
- " [ 650.4346, 2773.0723],\n",
980
- " [ 558.4219, 2773.0723]], dtype=float32)),\n",
981
- " ('when',\n",
982
- " array([[ 285.55664, 2731.8252 ],\n",
983
- " [ 377.56934, 2731.8252 ],\n",
984
- " [ 377.56934, 2773.0723 ],\n",
985
- " [ 285.55664, 2773.0723 ]], dtype=float32)),\n",
986
- " ('buy',\n",
987
- " array([[ 472.75488, 2731.8252 ],\n",
988
- " [ 542.5576 , 2731.8252 ],\n",
989
- " [ 542.5576 , 2773.0723 ],\n",
990
- " [ 472.75488, 2773.0723 ]], dtype=float32)),\n",
991
- " ('you',\n",
992
- " array([[ 390.26074, 2734.998 ],\n",
993
- " [ 460.06348, 2734.998 ],\n",
994
- " [ 460.06348, 2776.245 ],\n",
995
- " [ 390.26074, 2776.245 ]], dtype=float32)),\n",
996
- " ('of',\n",
997
- " array([[ 158.64258, 2779.418 ],\n",
998
- " [ 206.23535, 2779.418 ],\n",
999
- " [ 206.23535, 2823.838 ],\n",
1000
- " [ 158.64258, 2823.838 ]], dtype=float32)),\n",
1001
- " ('jel',\n",
1002
- " array([[ 225.27246, 2779.418 ],\n",
1003
- " [ 307.7666 , 2779.418 ],\n",
1004
- " [ 307.7666 , 2823.838 ],\n",
1005
- " [ 225.27246, 2823.838 ]], dtype=float32)),\n",
1006
- " ('worth',\n",
1007
- " array([[ 31.728516, 2782.5908 ],\n",
1008
- " [ 145.95117 , 2782.5908 ],\n",
1009
- " [ 145.95117 , 2830.1836 ],\n",
1010
- " [ 31.728516, 2830.1836 ]], dtype=float32)),\n",
1011
- " ('tens',\n",
1012
- " array([[ 434.68066, 2782.5908 ],\n",
1013
- " [ 533.03906, 2782.5908 ],\n",
1014
- " [ 533.03906, 2820.665 ],\n",
1015
- " [ 434.68066, 2820.665 ]], dtype=float32)),\n",
1016
- " ('ean',\n",
1017
- " array([[ 558.4219, 2782.5908],\n",
1018
- " [ 650.4346, 2782.5908],\n",
1019
- " [ 650.4346, 2823.838 ],\n",
1020
- " [ 558.4219, 2823.838 ]], dtype=float32)),\n",
1021
- " ('even',\n",
1022
- " array([[ 304.59375, 2785.7637 ],\n",
1023
- " [ 396.60645, 2785.7637 ],\n",
1024
- " [ 396.60645, 2823.838 ],\n",
1025
- " [ 304.59375, 2823.838 ]], dtype=float32)),\n",
1026
- " ('s',\n",
1027
- " array([[ 31.728516, 2830.1836 ],\n",
1028
- " [ 57.11133 , 2830.1836 ],\n",
1029
- " [ 57.11133 , 2871.4307 ],\n",
1030
- " [ 31.728516, 2871.4307 ]], dtype=float32)),\n",
1031
- " ('era',\n",
1032
- " array([[ 72.975586, 2830.1836 ],\n",
1033
- " [ 142.77832 , 2830.1836 ],\n",
1034
- " [ 142.77832 , 2877.7764 ],\n",
1035
- " [ 72.975586, 2877.7764 ]], dtype=float32)),\n",
1036
- " ('ffle',\n",
1037
- " array([[ 139.60547, 2830.1836 ],\n",
1038
- " [ 228.44531, 2830.1836 ],\n",
1039
- " [ 228.44531, 2877.7764 ],\n",
1040
- " [ 139.60547, 2877.7764 ]], dtype=float32)),\n",
1041
- " ('entr',\n",
1042
- " array([[ 241.13672, 2833.3564 ],\n",
1043
- " [ 336.32227, 2833.3564 ],\n",
1044
- " [ 336.32227, 2874.6035 ],\n",
1045
- " [ 241.13672, 2874.6035 ]], dtype=float32)),\n",
1046
- " ('ies',\n",
1047
- " array([[ 329.97656, 2836.5293 ],\n",
1048
- " [ 393.4336 , 2836.5293 ],\n",
1049
- " [ 393.4336 , 2871.4307 ],\n",
1050
- " [ 329.97656, 2871.4307 ]], dtype=float32)),\n",
1051
- " ('aphen',\n",
1052
- " array([[ 412.4707 , 2836.5293 ],\n",
1053
- " [ 501.31055, 2836.5293 ],\n",
1054
- " [ 501.31055, 2877.7764 ],\n",
1055
- " [ 412.4707 , 2877.7764 ]], dtype=float32)),\n",
1056
- " ('duy',\n",
1057
- " array([[ 596.4961, 2836.5293],\n",
1058
- " [ 669.4717, 2836.5293],\n",
1059
- " [ 669.4717, 2877.7764],\n",
1060
- " [ 596.4961, 2877.7764]], dtype=float32)),\n",
1061
- " ('you',\n",
1062
- " array([[ 517.1748 , 2839.7021 ],\n",
1063
- " [ 586.97754, 2839.7021 ],\n",
1064
- " [ 586.97754, 2877.7764 ],\n",
1065
- " [ 517.1748 , 2877.7764 ]], dtype=float32)),\n",
1066
- " ('dis',\n",
1067
- " array([[ 31.728516, 2884.122 ],\n",
1068
- " [ 79.32129 , 2884.122 ],\n",
1069
- " [ 79.32129 , 2925.3691 ],\n",
1070
- " [ 31.728516, 2925.3691 ]], dtype=float32)),\n",
1071
- " ('scdunted',\n",
1072
- " array([[ 76.14844, 2884.122 ],\n",
1073
- " [ 250.65527, 2884.122 ],\n",
1074
- " [ 250.65527, 2928.542 ],\n",
1075
- " [ 76.14844, 2928.542 ]], dtype=float32)),\n",
1076
- " ('booster',\n",
1077
- " array([[ 263.34668, 2884.122 ],\n",
1078
- " [ 415.64355, 2884.122 ],\n",
1079
- " [ 415.64355, 2928.542 ],\n",
1080
- " [ 263.34668, 2928.542 ]], dtype=float32)),\n",
1081
- " ('tenss',\n",
1082
- " array([[ 453.71777, 2884.122 ],\n",
1083
- " [ 548.9033 , 2884.122 ],\n",
1084
- " [ 548.9033 , 2928.542 ],\n",
1085
- " [ 453.71777, 2928.542 ]], dtype=float32)),\n",
1086
- " ('fper',\n",
1087
- " array([[ 577.459 , 2884.122 ],\n",
1088
- " [ 647.2617, 2884.122 ],\n",
1089
- " [ 647.2617, 2928.542 ],\n",
1090
- " [ 577.459 , 2928.542 ]], dtype=float32)),\n",
1091
- " ('dii',\n",
1092
- " array([[ 31.728516, 2934.8877 ],\n",
1093
- " [ 101.53125 , 2934.8877 ],\n",
1094
- " [ 101.53125 , 2979.3076 ],\n",
1095
- " [ 31.728516, 2979.3076 ]], dtype=float32)),\n",
1096
- " ('fair',\n",
1097
- " array([[ 117.39551, 2934.8877 ],\n",
1098
- " [ 209.4082 , 2934.8877 ],\n",
1099
- " [ 209.4082 , 2979.3076 ],\n",
1100
- " [ 117.39551, 2979.3076 ]], dtype=float32)),\n",
1101
- " ('trade',\n",
1102
- " array([[ 222.09961, 2934.8877 ],\n",
1103
- " [ 333.1494 , 2934.8877 ],\n",
1104
- " [ 333.1494 , 2979.3076 ],\n",
1105
- " [ 222.09961, 2979.3076 ]], dtype=float32)),\n",
1106
- " ('permt',\n",
1107
- " array([[ 346.57706, 2933.5906 ],\n",
1108
- " [ 458.4858 , 2939.4805 ],\n",
1109
- " [ 456.2683 , 2981.6128 ],\n",
1110
- " [ 344.35956, 2975.7231 ]], dtype=float32)),\n",
1111
- " ('nunbers',\n",
1112
- " array([[ 494.96484, 2934.8877 ],\n",
1113
- " [ 644.08887, 2934.8877 ],\n",
1114
- " [ 644.08887, 2979.3076 ],\n",
1115
- " [ 494.96484, 2979.3076 ]], dtype=float32)),\n",
1116
- " ('t',\n",
1117
- " array([[ 453.71777, 2941.2334 ],\n",
1118
- " [ 475.92773, 2941.2334 ],\n",
1119
- " [ 475.92773, 2976.1348 ],\n",
1120
- " [ 453.71777, 2976.1348 ]], dtype=float32)),\n",
1121
- " ('18015',\n",
1122
- " array([[ 117.39551, 2985.6533 ],\n",
1123
- " [ 247.48242, 2985.6533 ],\n",
1124
- " [ 247.48242, 3030.0732 ],\n",
1125
- " [ 117.39551, 3030.0732 ]], dtype=float32)),\n",
1126
- " ('series',\n",
1127
- " array([[ 263.34668, 2985.6533 ],\n",
1128
- " [ 396.60645, 2985.6533 ],\n",
1129
- " [ 396.60645, 3030.0732 ],\n",
1130
- " [ 263.34668, 3030.0732 ]], dtype=float32)),\n",
1131
- " ('of',\n",
1132
- " array([[ 409.29785, 2985.6533 ],\n",
1133
- " [ 456.89062, 2985.6533 ],\n",
1134
- " [ 456.89062, 3030.0732 ],\n",
1135
- " [ 409.29785, 3030.0732 ]], dtype=float32)),\n",
1136
- " ('edz5',\n",
1137
- " array([[ 472.75488, 2985.6533 ],\n",
1138
- " [ 571.1133 , 2985.6533 ],\n",
1139
- " [ 571.1133 , 3026.9004 ],\n",
1140
- " [ 472.75488, 3026.9004 ]], dtype=float32)),\n",
1141
- " ('facebooks',\n",
1142
- " array([[ 53.938477, 3036.419 ],\n",
1143
- " [ 234.79102 , 3036.419 ],\n",
1144
- " [ 234.79102 , 3080.8389 ],\n",
1145
- " [ 53.938477, 3080.8389 ]], dtype=float32)),\n",
1146
- " ('71',\n",
1147
- " array([[ 329.97656, 3036.419 ],\n",
1148
- " [ 371.22363, 3036.419 ],\n",
1149
- " [ 371.22363, 3077.666 ],\n",
1150
- " [ 329.97656, 3077.666 ]], dtype=float32)),\n",
1151
- " ('iphi',\n",
1152
- " array([[ 371.22363, 3036.419 ],\n",
1153
- " [ 456.89062, 3036.419 ],\n",
1154
- " [ 456.89062, 3080.8389 ],\n",
1155
- " [ 371.22363, 3080.8389 ]], dtype=float32)),\n",
1156
- " ('comf',\n",
1157
- " array([[ 241.13672, 3039.5918 ],\n",
1158
- " [ 333.1494 , 3039.5918 ],\n",
1159
- " [ 333.1494 , 3080.8389 ],\n",
1160
- " [ 241.13672, 3080.8389 ]], dtype=float32)),\n",
1161
- " ('l',\n",
1162
- " array([[ 456.89062, 3042.7646 ],\n",
1163
- " [ 469.58203, 3042.7646 ],\n",
1164
- " [ 469.58203, 3071.3203 ],\n",
1165
- " [ 456.89062, 3071.3203 ]], dtype=float32)),\n",
1166
- " ('pp',\n",
1167
- " array([[ 491.792 , 3039.5918],\n",
1168
- " [ 542.5576, 3039.5918],\n",
1169
- " [ 542.5576, 3080.8389],\n",
1170
- " [ 491.792 , 3080.8389]], dtype=float32)),\n",
1171
- " ('fes',\n",
1172
- " array([[ 552.0762 , 3039.5918 ],\n",
1173
- " [ 631.39746, 3039.5918 ],\n",
1174
- " [ 631.39746, 3077.666 ],\n",
1175
- " [ 552.0762 , 3077.666 ]], dtype=float32)),\n",
1176
- " ('this',\n",
1177
- " array([[ 53.938477, 3137.9502 ],\n",
1178
- " [ 142.77832 , 3137.9502 ],\n",
1179
- " [ 142.77832 , 3185.543 ],\n",
1180
- " [ 53.938477, 3185.543 ]], dtype=float32)),\n",
1181
- " ('is',\n",
1182
- " array([[ 158.64258, 3137.9502 ],\n",
1183
- " [ 206.23535, 3137.9502 ],\n",
1184
- " [ 206.23535, 3185.543 ],\n",
1185
- " [ 158.64258, 3185.543 ]], dtype=float32)),\n",
1186
- " ('official',\n",
1187
- " array([[ 282.3838 , 3137.9502 ],\n",
1188
- " [ 456.89062, 3137.9502 ],\n",
1189
- " [ 456.89062, 3185.543 ],\n",
1190
- " [ 282.3838 , 3185.543 ]], dtype=float32)),\n",
1191
- " ('receift',\n",
1192
- " array([[ 472.75488, 3137.9502 ],\n",
1193
- " [ 628.2246 , 3137.9502 ],\n",
1194
- " [ 628.2246 , 3185.543 ],\n",
1195
- " [ 472.75488, 3185.543 ]], dtype=float32)),\n",
1196
- " ('in',\n",
1197
- " array([[ 222.09961, 3141.123 ],\n",
1198
- " [ 269.69238, 3141.123 ],\n",
1199
- " [ 269.69238, 3182.37 ],\n",
1200
- " [ 222.09961, 3182.37 ]], dtype=float32))]]"
1201
- ]
1202
- },
1203
- "execution_count": 4,
1204
- "metadata": {},
1205
- "output_type": "execute_result"
1206
- }
1207
- ],
1208
- "source": [
1209
- "pipeline.recognize([r\"temp\\20230508_122035_preprocessed.png\"])"
1210
- ]
1211
- },
1212
- {
1213
- "cell_type": "code",
1214
- "execution_count": 5,
1215
- "metadata": {},
1216
- "outputs": [
1217
- {
1218
- "name": "stdout",
1219
- "output_type": "stream",
1220
- "text": [
1221
- "Requirement already satisfied: requests in c:\\users\\ayoo\\anaconda3\\envs\\mlenv\\lib\\site-packages (2.31.0)\n",
1222
- "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\ayoo\\anaconda3\\envs\\mlenv\\lib\\site-packages (from requests) (3.2.0)\n",
1223
- "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\ayoo\\anaconda3\\envs\\mlenv\\lib\\site-packages (from requests) (3.4)\n",
1224
- "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\ayoo\\anaconda3\\envs\\mlenv\\lib\\site-packages (from requests) (1.26.16)\n",
1225
- "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\ayoo\\anaconda3\\envs\\mlenv\\lib\\site-packages (from requests) (2023.7.22)\n"
1226
- ]
1227
- }
1228
- ],
1229
- "source": [
1230
- "!pip install requests"
1231
- ]
1232
- },
1233
- {
1234
- "cell_type": "code",
1235
- "execution_count": 16,
1236
- "metadata": {},
1237
- "outputs": [
1238
- {
1239
- "name": "stdout",
1240
- "output_type": "stream",
1241
- "text": [
1242
- "{'ParsedResults': [{'TextOverlay': {'Lines': [{'LineText': '7-ELEVEN.', 'Words': [{'WordText': '7', 'Left': 205.0, 'Top': 38.0, 'Height': 84.0, 'Width': 398.0}, {'WordText': '-', 'Left': 205.0, 'Top': 38.0, 'Height': 84.0, 'Width': 398.0}, {'WordText': 'ELEVEN', 'Left': 205.0, 'Top': 38.0, 'Height': 84.0, 'Width': 398.0}, {'WordText': '.', 'Left': 205.0, 'Top': 38.0, 'Height': 84.0, 'Width': 398.0}], 'MaxHeight': 84.0, 'MinTop': 38.0}, {'LineText': 'NHJ Convenience Store', 'Words': [{'WordText': 'NHJ', 'Left': 117.0, 'Top': 215.0, 'Height': 36.0, 'Width': 76.0}, {'WordText': 'Convenience', 'Left': 198.0, 'Top': 215.0, 'Height': 36.0, 'Width': 247.0}, {'WordText': 'Store', 'Left': 450.0, 'Top': 215.0, 'Height': 36.0, 'Width': 114.0}], 'MaxHeight': 36.0, 'MinTop': 215.0}, {'LineText': 'Owned & Operated by: Nancy A.', 'Words': [{'WordText': 'Owned', 'Left': 33.0, 'Top': 260.0, 'Height': 52.0, 'Width': 117.0}, {'WordText': '&', 'Left': 156.0, 'Top': 261.0, 'Height': 52.0, 'Width': 32.0}, {'WordText': 'Operated', 'Left': 195.0, 'Top': 261.0, 'Height': 52.0, 'Width': 182.0}, {'WordText': 'by', 'Left': 384.0, 'Top': 261.0, 'Height': 52.0, 'Width': 71.0}, {'WordText': ':', 'Left': 384.0, 'Top': 261.0, 'Height': 52.0, 'Width': 71.0}, {'WordText': 'Nancy', 'Left': 462.0, 'Top': 261.0, 'Height': 52.0, 'Width': 130.0}, {'WordText': 'A', 'Left': 598.0, 'Top': 260.0, 'Height': 52.0, 'Width': 47.0}, {'WordText': '.', 'Left': 598.0, 'Top': 260.0, 'Height': 52.0, 'Width': 47.0}], 'MaxHeight': 52.0, 'MinTop': 260.0}, {'LineText': 'Climacosa', 'Words': [{'WordText': 'Climacosa', 'Left': 244.0, 'Top': 315.0, 'Height': 38.0, 'Width': 193.0}], 'MaxHeight': 38.0, 'MinTop': 315.0}, {'LineText': 'VATREGTIN #933-598-685-002', 'Words': [{'WordText': 'VATREGTIN', 'Left': 75.0, 'Top': 361.0, 'Height': 43.0, 'Width': 204.0}, {'WordText': '#', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '933', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '-', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '598', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '-', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '685', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '-', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '002', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}], 'MaxHeight': 43.0, 'MinTop': 361.0}, {'LineText': 'Poblacion, Leon, Iloilo,', 'Words': [{'WordText': 'Poblacion', 'Left': 94.0, 'Top': 417.0, 'Height': 49.0, 'Width': 220.0}, {'WordText': ',', 'Left': 94.0, 'Top': 417.0, 'Height': 49.0, 'Width': 220.0}, {'WordText': 'Leon', 'Left': 321.0, 'Top': 417.0, 'Height': 49.0, 'Width': 122.0}, {'WordText': ',', 'Left': 321.0, 'Top': 417.0, 'Height': 49.0, 'Width': 122.0}, {'WordText': 'Iloilo', 'Left': 449.0, 'Top': 417.0, 'Height': 49.0, 'Width': 154.0}, {'WordText': ',', 'Left': 449.0, 'Top': 417.0, 'Height': 49.0, 'Width': 154.0}], 'MaxHeight': 49.0, 'MinTop': 417.0}, {'LineText': 'Philippines', 'Words': [{'WordText': 'Philippines', 'Left': 225.0, 'Top': 468.0, 'Height': 44.0, 'Width': 238.0}], 'MaxHeight': 44.0, 'MinTop': 468.0}, {'LineText': 'lel #: NULL', 'Words': [{'WordText': 'lel', 'Left': 221.0, 'Top': 524.0, 'Height': 40.0, 'Width': 78.0}, {'WordText': '#:', 'Left': 304.0, 'Top': 524.0, 'Height': 39.0, 'Width': 59.0}, {'WordText': 'NULL', 'Left': 368.0, 'Top': 523.0, 'Height': 40.0, 'Width': 89.0}], 'MaxHeight': 41.0, 'MinTop': 523.0}, {'LineText': '05/01/2023 (Mon) 23:00:57', 'Words': [{'WordText': '05', 'Left': 98.0, 'Top': 622.0, 'Height': 42.0, 'Width': 216.0}, {'WordText': '/', 'Left': 98.0, 'Top': 622.0, 'Height': 42.0, 'Width': 215.0}, {'WordText': '01', 'Left': 98.0, 'Top': 622.0, 'Height': 42.0, 'Width': 216.0}, {'WordText': '/', 'Left': 98.0, 'Top': 622.0, 'Height': 42.0, 'Width': 215.0}, {'WordText': '2023', 'Left': 98.0, 'Top': 622.0, 'Height': 42.0, 'Width': 216.0}, {'WordText': '(', 'Left': 319.0, 'Top': 622.0, 'Height': 42.0, 'Width': 105.0}, {'WordText': 'Mon', 'Left': 319.0, 'Top': 622.0, 'Height': 42.0, 'Width': 105.0}, {'WordText': ')', 'Left': 319.0, 'Top': 622.0, 'Height': 42.0, 'Width': 105.0}, {'WordText': '23', 'Left': 429.0, 'Top': 622.0, 'Height': 42.0, 'Width': 181.0}, {'WordText': ':', 'Left': 429.0, 'Top': 622.0, 'Height': 42.0, 'Width': 181.0}, {'WordText': '00', 'Left': 429.0, 'Top': 622.0, 'Height': 42.0, 'Width': 181.0}, {'WordText': ':', 'Left': 429.0, 'Top': 622.0, 'Height': 42.0, 'Width': 181.0}, {'WordText': '57', 'Left': 429.0, 'Top': 622.0, 'Height': 42.0, 'Width': 181.0}], 'MaxHeight': 42.0, 'MinTop': 622.0}, {'LineText': 'RCPT #2481347', 'Words': [{'WordText': 'RCPT', 'Left': 13.0, 'Top': 723.0, 'Height': 42.0, 'Width': 94.0}, {'WordText': '#', 'Left': 113.0, 'Top': 723.0, 'Height': 42.0, 'Width': 184.0}, {'WordText': '2481347', 'Left': 113.0, 'Top': 723.0, 'Height': 42.0, 'Width': 184.0}], 'MaxHeight': 42.0, 'MinTop': 723.0}, {'LineText': 'ROPT CNTHO', 'Words': [{'WordText': 'ROPT', 'Left': 472.0, 'Top': 723.0, 'Height': 49.0, 'Width': 96.0}, {'WordText': 'CNTHO', 'Left': 574.0, 'Top': 722.0, 'Height': 49.0, 'Width': 120.0}], 'MaxHeight': 50.0, 'MinTop': 722.0}, {'LineText': 'STORE#3058', 'Words': [{'WordText': 'STORE', 'Left': 13.0, 'Top': 771.0, 'Height': 47.0, 'Width': 219.0}, {'WordText': '#', 'Left': 13.0, 'Top': 771.0, 'Height': 47.0, 'Width': 219.0}, {'WordText': '3058', 'Left': 13.0, 'Top': 771.0, 'Height': 47.0, 'Width': 219.0}], 'MaxHeight': 47.0, 'MinTop': 771.0}, {'LineText': 'SN# :XTI43170', 'Words': [{'WordText': 'SN', 'Left': 433.0, 'Top': 771.0, 'Height': 49.0, 'Width': 66.0}, {'WordText': '#', 'Left': 433.0, 'Top': 771.0, 'Height': 49.0, 'Width': 66.0}, {'WordText': ':', 'Left': 505.0, 'Top': 771.0, 'Height': 50.0, 'Width': 189.0}, {'WordText': 'XTI43170', 'Left': 505.0, 'Top': 771.0, 'Height': 50.0, 'Width': 189.0}], 'MaxHeight': 50.0, 'MinTop': 771.0}, {'LineText': 'MIN #: 18112011091411051', 'Words': [{'WordText': 'MIN', 'Left': 13.0, 'Top': 830.0, 'Height': 39.0, 'Width': 73.0}, {'WordText': '#:', 'Left': 91.0, 'Top': 830.0, 'Height': 39.0, 'Width': 58.0}, {'WordText': '18112011091411051', 'Left': 154.0, 'Top': 830.0, 'Height': 39.0, 'Width': 360.0}], 'MaxHeight': 39.0, 'MinTop': 830.0}, {'LineText': 'STAFF: Angelica Duante', 'Words': [{'WordText': 'STAFF', 'Left': 13.0, 'Top': 879.0, 'Height': 43.0, 'Width': 124.0}, {'WordText': ':', 'Left': 13.0, 'Top': 879.0, 'Height': 43.0, 'Width': 124.0}, {'WordText': 'Angelica', 'Left': 142.0, 'Top': 879.0, 'Height': 43.0, 'Width': 177.0}, {'WordText': 'Duante', 'Left': 325.0, 'Top': 879.0, 'Height': 43.0, 'Width': 138.0}], 'MaxHeight': 43.0, 'MinTop': 879.0}, {'LineText': '7FKoreanßun', 'Words': [{'WordText': '7FKoreanßun', 'Left': 16.0, 'Top': 979.0, 'Height': 45.0, 'Width': 235.0}], 'MaxHeight': 45.0, 'MinTop': 979.0}, {'LineText': 'NissinYaSaBeet77g', 'Words': [{'WordText': 'NissinYaSaBeet77g', 'Left': 13.0, 'Top': 1032.0, 'Height': 42.0, 'Width': 365.0}], 'MaxHeight': 42.0, 'MinTop': 1032.0}, {'LineText': 'BBHOTDOGCREMYCHEES', 'Words': [{'WordText': 'BBHOTDOGCREMYCHEES', 'Left': 13.0, 'Top': 1084.0, 'Height': 39.0, 'Width': 384.0}], 'MaxHeight': 39.0, 'MinTop': 1084.0}, {'LineText': '39.00 Х 6', 'Words': [{'WordText': '39.00', 'Left': 140.0, 'Top': 1136.0, 'Height': 43.0, 'Width': 116.0}, {'WordText': 'Х', 'Left': 261.0, 'Top': 1136.0, 'Height': 43.0, 'Width': 100.0}, {'WordText': '6', 'Left': 366.0, 'Top': 1135.0, 'Height': 42.0, 'Width': 29.0}], 'MaxHeight': 44.0, 'MinTop': 1135.0}, {'LineText': 'chocvron? in1Ch020g', 'Words': [{'WordText': 'chocvron', 'Left': 13.0, 'Top': 1185.0, 'Height': 43.0, 'Width': 193.0}, {'WordText': '?', 'Left': 13.0, 'Top': 1185.0, 'Height': 43.0, 'Width': 193.0}, {'WordText': 'in1Ch020g', 'Left': 212.0, 'Top': 1185.0, 'Height': 43.0, 'Width': 186.0}], 'MaxHeight': 43.0, 'MinTop': 1185.0}, {'LineText': '15.00 X', 'Words': [{'WordText': '15.00', 'Left': 140.0, 'Top': 1240.0, 'Height': 43.0, 'Width': 116.0}, {'WordText': 'X', 'Left': 261.0, 'Top': 1240.0, 'Height': 42.0, 'Width': 42.0}], 'MaxHeight': 43.0, 'MinTop': 1240.0}, {'LineText': '2', 'Words': [{'WordText': '2', 'Left': 355.0, 'Top': 1240.0, 'Height': 39.0, 'Width': 39.0}], 'MaxHeight': 39.0, 'MinTop': 1240.0}, {'LineText': '55.004', 'Words': [{'WordText': '55.004', 'Left': 557.0, 'Top': 979.0, 'Height': 47.0, 'Width': 137.0}], 'MaxHeight': 47.0, 'MinTop': 979.0}, {'LineText': '40.000', 'Words': [{'WordText': '40.000', 'Left': 560.0, 'Top': 1031.0, 'Height': 48.0, 'Width': 134.0}], 'MaxHeight': 48.0, 'MinTop': 1031.0}, {'LineText': '234.000', 'Words': [{'WordText': '234.000', 'Left': 534.0, 'Top': 1135.0, 'Height': 47.0, 'Width': 160.0}], 'MaxHeight': 47.0, 'MinTop': 1135.0}, {'LineText': '30.000', 'Words': [{'WordText': '30.000', 'Left': 557.0, 'Top': 1237.0, 'Height': 46.0, 'Width': 137.0}], 'MaxHeight': 46.0, 'MinTop': 1237.0}, {'LineText': 'Total (10)', 'Words': [{'WordText': 'Total', 'Left': 13.0, 'Top': 1340.0, 'Height': 44.0, 'Width': 121.0}, {'WordText': '(', 'Left': 139.0, 'Top': 1342.0, 'Height': 44.0, 'Width': 86.0}, {'WordText': '10', 'Left': 139.0, 'Top': 1342.0, 'Height': 44.0, 'Width': 86.0}, {'WordText': ')', 'Left': 139.0, 'Top': 1342.0, 'Height': 44.0, 'Width': 86.0}], 'MaxHeight': 46.0, 'MinTop': 1340.0}, {'LineText': 'CASH', 'Words': [{'WordText': 'CASH', 'Left': 55.0, 'Top': 1390.0, 'Height': 43.0, 'Width': 91.0}], 'MaxHeight': 43.0, 'MinTop': 1390.0}, {'LineText': 'CHANGE', 'Words': [{'WordText': 'CHANGE', 'Left': 52.0, 'Top': 1442.0, 'Height': 43.0, 'Width': 137.0}], 'MaxHeight': 43.0, 'MinTop': 1442.0}, {'LineText': '359.00', 'Words': [{'WordText': '359.00', 'Left': 557.0, 'Top': 1341.0, 'Height': 47.0, 'Width': 137.0}], 'MaxHeight': 47.0, 'MinTop': 1341.0}, {'LineText': '1000.00', 'Words': [{'WordText': '1000.00', 'Left': 537.0, 'Top': 1389.0, 'Height': 48.0, 'Width': 154.0}], 'MaxHeight': 48.0, 'MinTop': 1389.0}, {'LineText': '641.00', 'Words': [{'WordText': '641.00', 'Left': 557.0, 'Top': 1442.0, 'Height': 46.0, 'Width': 134.0}], 'MaxHeight': 46.0, 'MinTop': 1442.0}, {'LineText': 'VATable', 'Words': [{'WordText': 'VATable', 'Left': 52.0, 'Top': 1546.0, 'Height': 40.0, 'Width': 157.0}], 'MaxHeight': 40.0, 'MinTop': 1546.0}, {'LineText': 'VAT_Tax', 'Words': [{'WordText': 'VAT_Tax', 'Left': 52.0, 'Top': 1598.0, 'Height': 50.0, 'Width': 157.0}], 'MaxHeight': 50.0, 'MinTop': 1598.0}, {'LineText': 'Zero_Rated', 'Words': [{'WordText': 'Zero_Rated', 'Left': 52.0, 'Top': 1649.0, 'Height': 48.0, 'Width': 219.0}], 'MaxHeight': 48.0, 'MinTop': 1649.0}, {'LineText': 'VAT_Exempted', 'Words': [{'WordText': 'VAT_Exempted', 'Left': 52.0, 'Top': 1699.0, 'Height': 50.0, 'Width': 264.0}], 'MaxHeight': 50.0, 'MinTop': 1699.0}, {'LineText': '320.54', 'Words': [{'WordText': '320.54', 'Left': 557.0, 'Top': 1546.0, 'Height': 46.0, 'Width': 134.0}], 'MaxHeight': 46.0, 'MinTop': 1546.0}, {'LineText': '38.46', 'Words': [{'WordText': '38.46', 'Left': 577.0, 'Top': 1598.0, 'Height': 43.0, 'Width': 114.0}], 'MaxHeight': 43.0, 'MinTop': 1598.0}, {'LineText': '0.00', 'Words': [{'WordText': '0.00', 'Left': 600.0, 'Top': 1651.0, 'Height': 42.0, 'Width': 91.0}], 'MaxHeight': 42.0, 'MinTop': 1651.0}, {'LineText': '0.00', 'Words': [{'WordText': '0.00', 'Left': 599.0, 'Top': 1702.0, 'Height': 43.0, 'Width': 95.0}], 'MaxHeight': 43.0, 'MinTop': 1702.0}, {'LineText': 'Sold To: 9906087698684', 'Words': [{'WordText': 'Sold', 'Left': 13.0, 'Top': 1803.0, 'Height': 42.0, 'Width': 94.0}, {'WordText': 'To', 'Left': 113.0, 'Top': 1803.0, 'Height': 42.0, 'Width': 79.0}, {'WordText': ':', 'Left': 113.0, 'Top': 1803.0, 'Height': 42.0, 'Width': 79.0}, {'WordText': '9906087698684', 'Left': 197.0, 'Top': 1803.0, 'Height': 42.0, 'Width': 285.0}], 'MaxHeight': 42.0, 'MinTop': 1803.0}, {'LineText': 'Name:', 'Words': [{'WordText': 'Name', 'Left': 10.0, 'Top': 1856.0, 'Height': 39.0, 'Width': 111.0}, {'WordText': ':', 'Left': 10.0, 'Top': 1856.0, 'Height': 39.0, 'Width': 111.0}], 'MaxHeight': 39.0, 'MinTop': 1856.0}, {'LineText': 'Address:', 'Words': [{'WordText': 'Address', 'Left': 13.0, 'Top': 1907.0, 'Height': 40.0, 'Width': 170.0}, {'WordText': ':', 'Left': 13.0, 'Top': 1907.0, 'Height': 40.0, 'Width': 170.0}], 'MaxHeight': 40.0, 'MinTop': 1907.0}, {'LineText': 'TIN:', 'Words': [{'WordText': 'TIN', 'Left': 13.0, 'Top': 1957.0, 'Height': 39.0, 'Width': 85.0}, {'WordText': ':', 'Left': 13.0, 'Top': 1957.0, 'Height': 39.0, 'Width': 85.0}], 'MaxHeight': 39.0, 'MinTop': 1957.0}, {'LineText': 'Philippine Seven Corporation', 'Words': [{'WordText': 'Philippine', 'Left': 10.0, 'Top': 2060.0, 'Height': 43.0, 'Width': 226.0}, {'WordText': 'Seven', 'Left': 241.0, 'Top': 2060.0, 'Height': 43.0, 'Width': 118.0}, {'WordText': 'Corporation', 'Left': 365.0, 'Top': 2060.0, 'Height': 43.0, 'Width': 241.0}], 'MaxHeight': 43.0, 'MinTop': 2060.0}, {'LineText': '7th Floor The Columbia Tower', 'Words': [{'WordText': '7th', 'Left': 13.0, 'Top': 2116.0, 'Height': 36.0, 'Width': 72.0}, {'WordText': 'Floor', 'Left': 90.0, 'Top': 2116.0, 'Height': 36.0, 'Width': 126.0}, {'WordText': 'The', 'Left': 220.0, 'Top': 2116.0, 'Height': 36.0, 'Width': 76.0}, {'WordText': 'Columbia', 'Left': 301.0, 'Top': 2116.0, 'Height': 36.0, 'Width': 189.0}, {'WordText': 'Tower', 'Left': 495.0, 'Top': 2116.0, 'Height': 36.0, 'Width': 108.0}], 'MaxHeight': 36.0, 'MinTop': 2116.0}, {'LineText': 'Ortigas Avenue, Mandaluyong', 'Words': [{'WordText': 'Ortigas', 'Left': 33.0, 'Top': 2161.0, 'Height': 49.0, 'Width': 154.0}, {'WordText': 'Avenue', 'Left': 192.0, 'Top': 2161.0, 'Height': 49.0, 'Width': 159.0}, {'WordText': ',', 'Left': 192.0, 'Top': 2161.0, 'Height': 49.0, 'Width': 159.0}, {'WordText': 'Mandaluyong', 'Left': 358.0, 'Top': 2161.0, 'Height': 49.0, 'Width': 248.0}], 'MaxHeight': 49.0, 'MinTop': 2161.0}, {'LineText': 'City', 'Words': [{'WordText': 'City', 'Left': 29.0, 'Top': 2214.0, 'Height': 42.0, 'Width': 94.0}], 'MaxHeight': 42.0, 'MinTop': 2214.0}, {'LineText': 'TIN: 000-390-189-000', 'Words': [{'WordText': 'TIN', 'Left': 13.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 90.0}, {'WordText': ':', 'Left': 13.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 90.0}, {'WordText': '000', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '-', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '390', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '-', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '189', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '-', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '000', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}], 'MaxHeight': 46.0, 'MinTop': 2266.0}, {'LineText': 'BIR ACCI #', 'Words': [{'WordText': 'BIR', 'Left': 10.0, 'Top': 2318.0, 'Height': 39.0, 'Width': 78.0}, {'WordText': 'ACCI', 'Left': 93.0, 'Top': 2318.0, 'Height': 39.0, 'Width': 98.0}, {'WordText': '#', 'Left': 195.0, 'Top': 2318.0, 'Height': 39.0, 'Width': 30.0}], 'MaxHeight': 40.0, 'MinTop': 2318.0}, {'LineText': '116-000390189-000346 19602', 'Words': [{'WordText': '116', 'Left': 33.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 430.0}, {'WordText': '-', 'Left': 33.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 430.0}, {'WordText': '000390189', 'Left': 33.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 430.0}, {'WordText': '-', 'Left': 33.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 430.0}, {'WordText': '000346', 'Left': 33.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 430.0}, {'WordText': '19602', 'Left': 468.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 118.0}], 'MaxHeight': 43.0, 'MinTop': 2366.0}, {'LineText': 'AcciDate: 08/01/2020', 'Words': [{'WordText': 'AcciDate', 'Left': 13.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 194.0}, {'WordText': ':', 'Left': 13.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 194.0}, {'WordText': '08', 'Left': 212.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 266.0}, {'WordText': '/', 'Left': 213.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 266.0}, {'WordText': '01', 'Left': 212.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 266.0}, {'WordText': '/', 'Left': 213.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 266.0}, {'WordText': '2020', 'Left': 212.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 266.0}], 'MaxHeight': 42.0, 'MinTop': 2419.0}, {'LineText': '07/31/2025', 'Words': [{'WordText': '07', 'Left': 29.0, 'Top': 2470.0, 'Height': 44.0, 'Width': 219.0}, {'WordText': '/', 'Left': 29.0, 'Top': 2470.0, 'Height': 44.0, 'Width': 219.0}, {'WordText': '31', 'Left': 29.0, 'Top': 2470.0, 'Height': 44.0, 'Width': 219.0}, {'WordText': '/', 'Left': 29.0, 'Top': 2470.0, 'Height': 44.0, 'Width': 219.0}, {'WordText': '2025', 'Left': 29.0, 'Top': 2470.0, 'Height': 44.0, 'Width': 219.0}], 'MaxHeight': 44.0, 'MinTop': 2470.0}, {'LineText': 'Permit #:', 'Words': [{'WordText': 'Permit', 'Left': 10.0, 'Top': 2526.0, 'Height': 40.0, 'Width': 142.0}, {'WordText': '#:', 'Left': 156.0, 'Top': 2527.0, 'Height': 39.0, 'Width': 46.0}], 'MaxHeight': 40.0, 'MinTop': 2526.0}, {'LineText': 'FP112018-074-0194656-00002', 'Words': [{'WordText': 'FP112018', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '-', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '074', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '-', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '0194656', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '-', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '00002', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}], 'MaxHeight': 39.0, 'MinTop': 2572.0}, {'LineText': 'Get a chance to win a trip for', 'Words': [{'WordText': 'Get', 'Left': 29.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 78.0}, {'WordText': 'a', 'Left': 112.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 34.0}, {'WordText': 'chance', 'Left': 151.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 146.0}, {'WordText': 'to', 'Left': 302.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 54.0}, {'WordText': 'win', 'Left': 361.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 83.0}, {'WordText': 'a', 'Left': 448.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 34.0}, {'WordText': 'trip', 'Left': 487.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 107.0}, {'WordText': 'for', 'Left': 599.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 69.0}], 'MaxHeight': 39.0, 'MinTop': 2679.0}, {'LineText': '2 to Korea when you buy PISO', 'Words': [{'WordText': '2', 'Left': 52.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 34.0}, {'WordText': 'to', 'Left': 91.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 58.0}, {'WordText': 'Korea', 'Left': 154.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 122.0}, {'WordText': 'when', 'Left': 281.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 102.0}, {'WordText': 'you', 'Left': 388.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 78.0}, {'WordText': 'buy', 'Left': 471.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 78.0}, {'WordText': 'PISO', 'Left': 554.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 91.0}], 'MaxHeight': 39.0, 'MinTop': 2731.0}, {'LineText': 'worth of 7-Eleven items. Earn', 'Words': [{'WordText': 'worth', 'Left': 29.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 116.0}, {'WordText': 'of', 'Left': 152.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 61.0}, {'WordText': '7', 'Left': 219.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 184.0}, {'WordText': '-', 'Left': 219.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 184.0}, {'WordText': 'Eleven', 'Left': 219.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 184.0}, {'WordText': 'items', 'Left': 409.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 135.0}, {'WordText': '.', 'Left': 409.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 135.0}, {'WordText': 'Earn', 'Left': 550.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 99.0}], 'MaxHeight': 49.0, 'MinTop': 2783.0}, {'LineText': '3 eRaffle entries when you buy', 'Words': [{'WordText': '3', 'Left': 29.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 34.0}, {'WordText': 'eRaffle', 'Left': 68.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 166.0}, {'WordText': 'entries', 'Left': 239.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 161.0}, {'WordText': 'when', 'Left': 404.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 102.0}, {'WordText': 'you', 'Left': 512.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 78.0}, {'WordText': 'buy', 'Left': 594.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 70.0}], 'MaxHeight': 39.0, 'MinTop': 2832.0}, {'LineText': 'discounted booster Items. Per', 'Words': [{'WordText': 'discounted', 'Left': 33.0, 'Top': 2888.0, 'Height': 42.0, 'Width': 221.0}, {'WordText': 'booster', 'Left': 259.0, 'Top': 2888.0, 'Height': 42.0, 'Width': 168.0}, {'WordText': 'Items', 'Left': 432.0, 'Top': 2888.0, 'Height': 42.0, 'Width': 137.0}, {'WordText': '.', 'Left': 432.0, 'Top': 2888.0, 'Height': 42.0, 'Width': 137.0}, {'WordText': 'Per', 'Left': 574.0, 'Top': 2888.0, 'Height': 42.0, 'Width': 68.0}], 'MaxHeight': 42.0, 'MinTop': 2888.0}, {'LineText': 'DTI FAIR TRADE Permit Number:', 'Words': [{'WordText': 'DTI', 'Left': 29.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 78.0}, {'WordText': 'FAIR', 'Left': 113.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 100.0}, {'WordText': 'TRADE', 'Left': 218.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 121.0}, {'WordText': 'Permit', 'Left': 344.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 142.0}, {'WordText': 'Number', 'Left': 491.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 151.0}, {'WordText': ':', 'Left': 491.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 151.0}], 'MaxHeight': 42.0, 'MinTop': 2933.0}, {'LineText': '163019 Series of 2023..', 'Words': [{'WordText': '163019', 'Left': 117.0, 'Top': 2988.0, 'Height': 40.0, 'Width': 135.0}, {'WordText': 'Series', 'Left': 257.0, 'Top': 2988.0, 'Height': 40.0, 'Width': 145.0}, {'WordText': 'of', 'Left': 407.0, 'Top': 2988.0, 'Height': 40.0, 'Width': 55.0}, {'WordText': '2023', 'Left': 467.0, 'Top': 2988.0, 'Height': 40.0, 'Width': 165.0}, {'WordText': '..', 'Left': 467.0, 'Top': 2988.0, 'Height': 40.0, 'Width': 165.0}], 'MaxHeight': 40.0, 'MinTop': 2988.0}, {'LineText': 'facebook.com/711philippines.', 'Words': [{'WordText': 'facebook.com', 'Left': 52.0, 'Top': 3037.0, 'Height': 42.0, 'Width': 590.0}, {'WordText': '/', 'Left': 52.0, 'Top': 3037.0, 'Height': 42.0, 'Width': 590.0}, {'WordText': '711philippines', 'Left': 52.0, 'Top': 3037.0, 'Height': 42.0, 'Width': 590.0}, {'WordText': '.', 'Left': 52.0, 'Top': 3037.0, 'Height': 42.0, 'Width': 590.0}], 'MaxHeight': 42.0, 'MinTop': 3037.0}, {'LineText': '- THIS IS AN OFFICIAL RECEIPT -', 'Words': [{'WordText': '-', 'Left': 0.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 46.0}, {'WordText': 'THIS', 'Left': 52.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 98.0}, {'WordText': 'IS', 'Left': 155.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 57.0}, {'WordText': 'AN', 'Left': 219.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 52.0}, {'WordText': 'OFFICIAL', 'Left': 276.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 184.0}, {'WordText': 'RECEIPT', 'Left': 466.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 167.0}, {'WordText': '-', 'Left': 638.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 30.0}], 'MaxHeight': 46.0, 'MinTop': 3138.0}], 'HasOverlay': True}, 'TextOrientation': '0', 'FileParseExitCode': 1, 'ParsedText': '7-ELEVEN.\\nNHJ Convenience Store\\nOwned & Operated by: Nancy A.\\nClimacosa\\nVATREGTIN #933-598-685-002\\nPoblacion, Leon, Iloilo,\\nPhilippines\\nlel #: NULL\\n05/01/2023 (Mon) 23:00:57\\nRCPT #2481347\\nROPT CNTHO\\nSTORE#3058\\nSN# :XTI43170\\nMIN #: 18112011091411051\\nSTAFF: Angelica Duante\\n7FKoreanßun\\nNissinYaSaBeet77g\\nBBHOTDOGCREMYCHEES\\n39.00 Х 6\\nchocvron? in1Ch020g\\n15.00 X\\n2\\n55.004\\n40.000\\n234.000\\n30.000\\nTotal (10)\\nCASH\\nCHANGE\\n359.00\\n1000.00\\n641.00\\nVATable\\nVAT_Tax\\nZero_Rated\\nVAT_Exempted\\n320.54\\n38.46\\n0.00\\n0.00\\nSold To: 9906087698684\\nName:\\nAddress:\\nTIN:\\nPhilippine Seven Corporation\\n7th Floor The Columbia Tower\\nOrtigas Avenue, Mandaluyong\\nCity\\nTIN: 000-390-189-000\\nBIR ACCI #\\n116-000390189-000346 19602\\nAcciDate: 08/01/2020\\n07/31/2025\\nPermit #:\\nFP112018-074-0194656-00002\\nGet a chance to win a trip for\\n2 to Korea when you buy PISO\\nworth of 7-Eleven items. Earn\\n3 eRaffle entries when you buy\\ndiscounted booster Items. Per\\nDTI FAIR TRADE Permit Number:\\n163019 Series of 2023..\\nfacebook.com/711philippines.\\n- THIS IS AN OFFICIAL RECEIPT -', 'ErrorMessage': '', 'ErrorDetails': ''}], 'OCRExitCode': 1, 'IsErroredOnProcessing': False, 'ProcessingTimeInMilliseconds': '2593', 'SearchablePDFURL': 'Searchable PDF not generated as it was not requested.'}\n"
1243
- ]
1244
- }
1245
- ],
1246
- "source": [
1247
- "# Import requests library\n",
1248
- "import requests\n",
1249
- "\n",
1250
- "# Define the OCR API endpoint\n",
1251
- "url = \"https://api.ocr.space/parse/image\"\n",
1252
- "\n",
1253
- "# Define the API key and the language\n",
1254
- "api_key = \"K88232854988957\"\n",
1255
- "language = \"eng\"\n",
1256
- "\n",
1257
- "# Define the image file path\n",
1258
- "image_file = r\"C:\\Users\\Ayoo\\Desktop\\webapp\\predictions\\imgs\\20230508_122035.jpg\"\n",
1259
- "\n",
1260
- "# Open the image file as binary\n",
1261
- "with open(image_file, \"rb\") as f:\n",
1262
- " # Define the payload for the API request\n",
1263
- " payload = {\n",
1264
- " \"apikey\": api_key,\n",
1265
- " \"language\": language,\n",
1266
- " \"isOverlayRequired\": True, # Optional, set to True if you want the coordinates of the words\n",
1267
- " \"OCREngine\": 2 # OCR Engine 2 for Layoutlmv3\n",
1268
- " }\n",
1269
- " # Define the file parameter for the API request\n",
1270
- " file = {\n",
1271
- " \"file\": f\n",
1272
- " }\n",
1273
- " # Send the POST request to the OCR API\n",
1274
- " response = requests.post(url, data=payload, files=file)\n",
1275
- "\n",
1276
- "# Check the status code of the response\n",
1277
- "if response.status_code == 200:\n",
1278
- " # Parse the JSON response\n",
1279
- " result = response.json()\n",
1280
- " # Print the parsed text\n",
1281
- " print(result)\n",
1282
- "else:\n",
1283
- " # Print the error message\n",
1284
- " print(\"Error: \" + response.text)\n"
1285
- ]
1286
- },
1287
- {
1288
- "cell_type": "code",
1289
- "execution_count": 13,
1290
- "metadata": {},
1291
- "outputs": [
1292
- {
1293
- "ename": "TypeError",
1294
- "evalue": "Object of type Response is not JSON serializable",
1295
- "output_type": "error",
1296
- "traceback": [
1297
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
1298
- "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
1299
- "Cell \u001b[1;32mIn[13], line 4\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# Assuming 'response' is the JSON response from the OCR API\u001b[39;00m\n\u001b[1;32m----> 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdumps\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m4\u001b[39;49m\u001b[43m)\u001b[49m)\n",
1300
- "File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\json\\__init__.py:238\u001b[0m, in \u001b[0;36mdumps\u001b[1;34m(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)\u001b[0m\n\u001b[0;32m 232\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 233\u001b[0m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m=\u001b[39m JSONEncoder\n\u001b[0;32m 234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[0;32m 235\u001b[0m \u001b[43m \u001b[49m\u001b[43mskipkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskipkeys\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mensure_ascii\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mensure_ascii\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 236\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_circular\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_circular\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mallow_nan\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mallow_nan\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 237\u001b[0m \u001b[43m \u001b[49m\u001b[43mseparators\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mseparators\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdefault\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdefault\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msort_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m--> 238\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n",
1301
- "File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\json\\encoder.py:202\u001b[0m, in \u001b[0;36mJSONEncoder.encode\u001b[1;34m(self, o)\u001b[0m\n\u001b[0;32m 200\u001b[0m chunks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miterencode(o, _one_shot\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 201\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(chunks, (\u001b[38;5;28mlist\u001b[39m, \u001b[38;5;28mtuple\u001b[39m)):\n\u001b[1;32m--> 202\u001b[0m chunks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(chunks)\n\u001b[0;32m 203\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(chunks)\n",
1302
- "File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\json\\encoder.py:439\u001b[0m, in \u001b[0;36m_make_iterencode.<locals>._iterencode\u001b[1;34m(o, _current_indent_level)\u001b[0m\n\u001b[0;32m 437\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCircular reference detected\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 438\u001b[0m markers[markerid] \u001b[38;5;241m=\u001b[39m o\n\u001b[1;32m--> 439\u001b[0m o \u001b[38;5;241m=\u001b[39m \u001b[43m_default\u001b[49m\u001b[43m(\u001b[49m\u001b[43mo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 440\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m _iterencode(o, _current_indent_level)\n\u001b[0;32m 441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m markers \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
1303
- "File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\json\\encoder.py:180\u001b[0m, in \u001b[0;36mJSONEncoder.default\u001b[1;34m(self, o)\u001b[0m\n\u001b[0;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdefault\u001b[39m(\u001b[38;5;28mself\u001b[39m, o):\n\u001b[0;32m 162\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Implement this method in a subclass such that it returns\u001b[39;00m\n\u001b[0;32m 163\u001b[0m \u001b[38;5;124;03m a serializable object for ``o``, or calls the base implementation\u001b[39;00m\n\u001b[0;32m 164\u001b[0m \u001b[38;5;124;03m (to raise a ``TypeError``).\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 178\u001b[0m \n\u001b[0;32m 179\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 180\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mObject of type \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mo\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 181\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mis not JSON serializable\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
1304
- "\u001b[1;31mTypeError\u001b[0m: Object of type Response is not JSON serializable"
1305
- ]
1306
- }
1307
- ],
1308
- "source": [
1309
- "import json\n",
1310
- "\n",
1311
- "# Assuming 'response' is the JSON response from the OCR API\n",
1312
- "print(json.dumps(response, indent=4))\n"
1313
- ]
1314
- }
1315
- ],
1316
- "metadata": {
1317
- "kernelspec": {
1318
- "display_name": "mlenv",
1319
- "language": "python",
1320
- "name": "python3"
1321
- },
1322
- "language_info": {
1323
- "codemirror_mode": {
1324
- "name": "ipython",
1325
- "version": 3
1326
- },
1327
- "file_extension": ".py",
1328
- "mimetype": "text/x-python",
1329
- "name": "python",
1330
- "nbconvert_exporter": "python",
1331
- "pygments_lexer": "ipython3",
1332
- "version": "3.11.5"
1333
- }
1334
- },
1335
- "nbformat": 4,
1336
- "nbformat_minor": 2
1337
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
inferenced/csv_files/Output_0.csv DELETED
@@ -1,4 +0,0 @@
1
- RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
2
- # 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,C26rnTeaLemon500ml,39.000,88.00,9.43
3
- # 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,COBRENRGYORNK350ML,28.000,88.00,9.43
4
- # 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,OTSHIBMPFRSPLNS50G,21.000,88.00,9.43
 
 
 
 
 
inferenced/csv_files/Output_1.csv DELETED
@@ -1,2 +0,0 @@
1
- RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
2
- 01053710,Iloilo Grace Pharmacy,C & J Building Jalandoni Extension Bolilao,08 / 12 / 2023,10 : 07,PharmtonEsentialCao,23.75,23 - 75,
 
 
 
inferenced/csv_files/Output_2.csv DELETED
@@ -1,3 +0,0 @@
1
- RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
2
- # 1457229,7 - ELEVEN �,Poblacion . Leon . Iloilo .,05 / 01 / 2023 ( Mon ),16 : 54 : 23,NESTEALEMICET500ML,35.000,76.00,8.14
3
- # 1457229,7 - ELEVEN �,Poblacion . Leon . Iloilo .,05 / 01 / 2023 ( Mon ),16 : 54 : 23,ArlaGStrwbryT200ml,41.000,76.00,8.14
 
 
 
 
inferenced/csv_files/Output_3.csv DELETED
@@ -1,2 +0,0 @@
1
- RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
2
- 000036410,WVSU Multi Purpose Cooperative,Luna Street Lapaz Iloilo City,10 - 25 - 2023,01 : 29 : 49 PM,COKE,13.00,13.00,1.39
 
 
 
inferenced/csv_files/Output_4.csv DELETED
@@ -1,2 +0,0 @@
1
- RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
2
- 01053735,Iloilo Grace Pharmacy,C & J Building Jalandoni Extension Bolilao,09 / 12 / 2023,11 : 07,EQDryTravelM18,3.31.00,331 - 00,35.46
 
 
 
inferenced/output.csv DELETED
@@ -1,9 +0,0 @@
1
- RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
2
- # 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,C26rnTeaLemon500ml,39.000,88.00,9.43
3
- # 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,COBRENRGYORNK350ML,28.000,88.00,9.43
4
- # 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,OTSHIBMPFRSPLNS50G,21.000,88.00,9.43
5
- 01053710,Iloilo Grace Pharmacy,C & J Building Jalandoni Extension Bolilao,08 / 12 / 2023,10 : 07,PharmtonEsentialCao,23.75,23 - 75,
6
- # 1457229,7 - ELEVEN �,Poblacion . Leon . Iloilo .,05 / 01 / 2023 ( Mon ),16 : 54 : 23,NESTEALEMICET500ML,35.000,76.00,8.14
7
- # 1457229,7 - ELEVEN �,Poblacion . Leon . Iloilo .,05 / 01 / 2023 ( Mon ),16 : 54 : 23,ArlaGStrwbryT200ml,41.000,76.00,8.14
8
- 000036410,WVSU Multi Purpose Cooperative,Luna Street Lapaz Iloilo City,10 - 25 - 2023,01 : 29 : 49 PM,COKE,13.00,13.00,1.39
9
- 01053735,Iloilo Grace Pharmacy,C & J Building Jalandoni Extension Bolilao,09 / 12 / 2023,11 : 07,EQDryTravelM18,3.31.00,331 - 00,35.46
 
 
 
 
 
 
 
 
 
 
inferenced/sample1_711_inference.jpg DELETED
Binary file (295 kB)
 
inferenced/sample1_grace_inference.jpg DELETED
Binary file (186 kB)
 
inferenced/sample_711_inference.jpg DELETED
Binary file (298 kB)
 
inferenced/sample_coop_inference.jpg DELETED
Binary file (276 kB)
 
inferenced/sample_grace_inference.jpg DELETED
Binary file (205 kB)
 
log/error_output.log CHANGED
@@ -308,3 +308,21 @@ Traceback (most recent call last):
308
  TypeError: The view function for 'create_csv' did not return a valid response. The function either returned None or ended without a return statement.
309
  2024-02-22 10:18:01,539 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 10:18:01] "GET /create_csv HTTP/1.1" 500 -
310
  2024-02-22 10:18:02,099 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 10:18:02] "GET /get_data HTTP/1.1" 404 -
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  TypeError: The view function for 'create_csv' did not return a valid response. The function either returned None or ended without a return statement.
309
  2024-02-22 10:18:01,539 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 10:18:01] "GET /create_csv HTTP/1.1" 500 -
310
  2024-02-22 10:18:02,099 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 10:18:02] "GET /get_data HTTP/1.1" 404 -
311
+ 2024-02-22 17:02:51,698 ERROR app 'NoneType' object is not iterable
312
+ 2024-02-22 17:02:51,706 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 17:02:51] "GET /run_inference HTTP/1.1" 302 -
313
+ 2024-02-22 17:02:51,754 ERROR app Exception on /create_csv [GET]
314
+ Traceback (most recent call last):
315
+ File "C:\Users\Ayoo\anaconda3\envs\mlenv\Lib\site-packages\flask\app.py", line 2190, in wsgi_app
316
+ response = self.full_dispatch_request()
317
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
318
+ File "C:\Users\Ayoo\anaconda3\envs\mlenv\Lib\site-packages\flask\app.py", line 1487, in full_dispatch_request
319
+ return self.finalize_request(rv)
320
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
321
+ File "C:\Users\Ayoo\anaconda3\envs\mlenv\Lib\site-packages\flask\app.py", line 1506, in finalize_request
322
+ response = self.make_response(rv)
323
+ ^^^^^^^^^^^^^^^^^^^^^^
324
+ File "C:\Users\Ayoo\anaconda3\envs\mlenv\Lib\site-packages\flask\app.py", line 1801, in make_response
325
+ raise TypeError(
326
+ TypeError: The view function for 'create_csv' did not return a valid response. The function either returned None or ended without a return statement.
327
+ 2024-02-22 17:02:51,766 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 17:02:51] "GET /create_csv HTTP/1.1" 500 -
328
+ 2024-02-22 17:02:52,348 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 17:02:52] "GET /get_data HTTP/1.1" 404 -
static/inference/Layoutlmv3_inference/__init__.py DELETED
File without changes
static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-310.pyc DELETED
Binary file (176 Bytes)
 
static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-311.pyc DELETED
Binary file (195 Bytes)
 
static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (180 Bytes)
 
static/inference/Layoutlmv3_inference/__pycache__/annotate_image.cpython-310.pyc DELETED
Binary file (2.04 kB)
 
static/inference/Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc DELETED
Binary file (3.87 kB)
 
static/inference/Layoutlmv3_inference/__pycache__/inference_handler.cpython-310.pyc DELETED
Binary file (6.83 kB)
 
static/inference/Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc DELETED
Binary file (13.5 kB)
 
static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-310.pyc DELETED
Binary file (3.51 kB)
 
static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-311.pyc DELETED
Binary file (9.92 kB)
 
static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-312.pyc DELETED
Binary file (5.24 kB)
 
static/inference/Layoutlmv3_inference/__pycache__/utils.cpython-310.pyc DELETED
Binary file (2.41 kB)
 
static/inference/Layoutlmv3_inference/__pycache__/utils.cpython-311.pyc DELETED
Binary file (3.84 kB)
 
static/inference/Layoutlmv3_inference/annotate_image.py DELETED
@@ -1,56 +0,0 @@
1
- import os
2
- from PIL import Image, ImageDraw, ImageFont
3
- from .utils import image_label_2_color
4
-
5
-
6
- def get_flattened_output(docs):
7
- print("Running Flattened Output")
8
- flattened_output = []
9
- annotation_key = 'output'
10
- for doc in docs:
11
- flattened_output_item = {annotation_key: []}
12
- doc_annotation = doc[annotation_key]
13
- for i, span in enumerate(doc_annotation):
14
- if len(span['words']) > 1:
15
- for span_chunk in span['words']:
16
- flattened_output_item[annotation_key].append(
17
- {
18
- 'label': span['label'],
19
- 'text': span_chunk['text'],
20
- 'words': [span_chunk]
21
- }
22
- )
23
-
24
- else:
25
- flattened_output_item[annotation_key].append(span)
26
- flattened_output.append(flattened_output_item)
27
- return flattened_output
28
-
29
-
30
- def annotate_image(image_path, annotation_object):
31
- print("Annotating Images")
32
- img = None
33
- image = Image.open(image_path).convert('RGBA')
34
- tmp = image.copy()
35
- label2color = image_label_2_color(annotation_object)
36
- overlay = Image.new('RGBA', tmp.size, (0, 0, 0)+(0,))
37
- draw = ImageDraw.Draw(overlay)
38
- font = ImageFont.load_default()
39
-
40
- predictions = [span['label'] for span in annotation_object['output']]
41
- boxes = [span['words'][0]['box'] for span in annotation_object['output']]
42
- for prediction, box in zip(predictions, boxes):
43
- draw.rectangle(box, outline=label2color[prediction],
44
- width=3, fill=label2color[prediction]+(int(255*0.33),))
45
- draw.text((box[0] + 10, box[1] - 10), text=prediction,
46
- fill=label2color[prediction], font=font)
47
-
48
- img = Image.alpha_composite(tmp, overlay)
49
- img = img.convert("RGB")
50
-
51
- image_name = os.path.basename(image_path)
52
- image_name = image_name[:image_name.find('.')]
53
- output_folder = 'inferenced/'
54
- os.makedirs(output_folder, exist_ok=True)
55
-
56
- img.save(os.path.join(output_folder, f'{image_name}_inference.jpg'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static/inference/Layoutlmv3_inference/inference_handler.py DELETED
@@ -1,199 +0,0 @@
1
- from .utils import load_model,load_processor,normalize_box,compare_boxes,adjacent
2
- from .annotate_image import get_flattened_output,annotate_image
3
- from PIL import Image,ImageDraw, ImageFont
4
- import logging
5
- import torch
6
- import json
7
- import os
8
-
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
- class ModelHandler(object):
13
- def __init__(self):
14
- self.model = None
15
- self.model_dir = None
16
- self.device = 'cpu'
17
- self.error = None
18
- self.initialized = False
19
- self._raw_input_data = None
20
- self._processed_data = None
21
- self._images_size = None
22
-
23
- def initialize(self, context):
24
- try:
25
- logger.info("Loading transformer model")
26
- self._context = context
27
- properties = self._context
28
- self.model_dir = properties.get("model_dir")
29
- self.model = self.load(self.model_dir)
30
- self.initialized = True
31
- except Exception as e:
32
- logger.error(f"Error initializing model: {str(e)}")
33
- self.error = str(e)
34
-
35
- def preprocess(self, batch):
36
- try:
37
- inference_dict = batch
38
- self._raw_input_data = inference_dict
39
- processor = load_processor()
40
- images = [Image.open(path).convert("RGB")
41
- for path in inference_dict['image_path']]
42
- self._images_size = [img.size for img in images]
43
- words = inference_dict['words']
44
- boxes = [[normalize_box(box, images[i].size[0], images[i].size[1])
45
- for box in doc] for i, doc in enumerate(inference_dict['bboxes'])]
46
- encoded_inputs = processor(
47
- images, words, boxes=boxes, return_tensors="pt", padding="max_length", truncation=True)
48
- self._processed_data = encoded_inputs
49
- return encoded_inputs
50
- except Exception as e:
51
- logger.error(f"Error in preprocessing: {str(e)}")
52
- self.error = str(e)
53
- return None
54
-
55
- def load(self, model_dir):
56
- try:
57
- model = load_model(model_dir)
58
- return model
59
- except Exception as e:
60
- logger.error(f"Error loading LayoutLMv3 model: {str(e)}")
61
- self.error = str(e)
62
- return None
63
-
64
- def inference(self, model_input):
65
- try:
66
- with torch.no_grad():
67
- inference_outputs = self.model(**model_input)
68
- predictions = inference_outputs.logits.argmax(-1).tolist()
69
- results = []
70
- for i in range(len(predictions)):
71
- tmp = dict()
72
- tmp[f'output_{i}'] = predictions[i]
73
- results.append(tmp)
74
- return [results]
75
- except Exception as e:
76
- logger.error(f"Error in inference: {str(e)}")
77
- self.error = str(e)
78
- return None
79
-
80
- def postprocess(self, inference_output):
81
- try:
82
- docs = []
83
- k = 0
84
- for page, doc_words in enumerate(self._raw_input_data['words']):
85
- doc_list = []
86
- width, height = self._images_size[page]
87
- for i, doc_word in enumerate(doc_words, start=0):
88
- word_tagging = None
89
- word_labels = []
90
- word = dict()
91
- word['id'] = k
92
- k += 1
93
- word['text'] = doc_word
94
- word['pageNum'] = page + 1
95
- word['box'] = self._raw_input_data['bboxes'][page][i]
96
- _normalized_box = normalize_box(
97
- self._raw_input_data['bboxes'][page][i], width, height)
98
- for j, box in enumerate(self._processed_data['bbox'].tolist()[page]):
99
- if compare_boxes(box, _normalized_box):
100
- if self.model.config.id2label[inference_output[0][page][f'output_{page}'][j]] != 'O':
101
- word_labels.append(
102
- self.model.config.id2label[inference_output[0][page][f'output_{page}'][j]][2:])
103
- else:
104
- word_labels.append('other')
105
- if word_labels != []:
106
- word_tagging = word_labels[0] if word_labels[0] != 'other' else word_labels[-1]
107
- else:
108
- word_tagging = 'other'
109
- word['label'] = word_tagging
110
- word['pageSize'] = {'width': width, 'height': height}
111
- if word['label'] != 'other':
112
- doc_list.append(word)
113
- spans = []
114
- def adjacents(entity): return [
115
- adj for adj in doc_list if adjacent(entity, adj)]
116
- output_test_tmp = doc_list[:]
117
- for entity in doc_list:
118
- if adjacents(entity) == []:
119
- spans.append([entity])
120
- output_test_tmp.remove(entity)
121
-
122
- while output_test_tmp != []:
123
- span = [output_test_tmp[0]]
124
- output_test_tmp = output_test_tmp[1:]
125
- while output_test_tmp != [] and adjacent(span[-1], output_test_tmp[0]):
126
- span.append(output_test_tmp[0])
127
- output_test_tmp.remove(output_test_tmp[0])
128
- spans.append(span)
129
-
130
- output_spans = []
131
- for span in spans:
132
- if len(span) == 1:
133
- output_span = {"text": span[0]['text'],
134
- "label": span[0]['label'],
135
- "words": [{
136
- 'id': span[0]['id'],
137
- 'box': span[0]['box'],
138
- 'text': span[0]['text']
139
- }],
140
- }
141
- else:
142
- output_span = {"text": ' '.join([entity['text'] for entity in span]),
143
- "label": span[0]['label'],
144
- "words": [{
145
- 'id': entity['id'],
146
- 'box': entity['box'],
147
- 'text': entity['text']
148
- } for entity in span]
149
-
150
- }
151
- output_spans.append(output_span)
152
- docs.append({f'output': output_spans})
153
- return [json.dumps(docs, ensure_ascii=False)]
154
-
155
- except Exception as e:
156
- logger.error(f"Error in postprocessing: {str(e)}")
157
- self.error = str(e)
158
- return None
159
-
160
-
161
- def handle(self, data, context):
162
- try:
163
- if not self.initialized:
164
- self.initialize(context)
165
-
166
- if data is None:
167
- return None
168
-
169
- model_input = self.preprocess(data)
170
- if model_input is None:
171
- return None
172
-
173
- model_out = self.inference(model_input)
174
- if model_out is None:
175
- return None
176
-
177
- inference_out = self.postprocess(model_out)[0]
178
- with open('temp/LayoutlMV3InferenceOutput.json', 'w') as inf_out:
179
- inf_out.write(inference_out)
180
- inference_out_list = json.loads(inference_out)
181
- flattened_output_list = get_flattened_output(inference_out_list)
182
- print('Ready for Annotation')
183
- for i, flattened_output in enumerate(flattened_output_list):
184
- annotate_image(data['image_path'][i], flattened_output)
185
- except Exception as e:
186
- logger.error(f"Error handling request: {str(e)}")
187
- self.error = str(e)
188
-
189
- _service = ModelHandler()
190
-
191
-
192
- def handle(data, context):
193
- if not _service.initialized:
194
- _service.initialize(context)
195
-
196
- if data is None:
197
- return None
198
-
199
- return _service.handle(data, context)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static/inference/Layoutlmv3_inference/ocr.py DELETED
@@ -1,187 +0,0 @@
1
- import os
2
- import pandas as pd
3
- import cv2
4
- import numpy as np
5
- import json
6
- import requests
7
- import traceback
8
-
9
- from PIL import Image
10
-
11
- def preprocess_image(image_path, max_file_size_mb=1, target_file_size_mb=0.5):
12
- try:
13
- # Check file size
14
- file_size_mb = os.path.getsize(image_path) / (1024 * 1024) # Convert to megabytes
15
- if file_size_mb > max_file_size_mb:
16
- print(f"File size ({file_size_mb} MB) exceeds the maximum allowed size ({max_file_size_mb} MB). Resizing the image.")
17
-
18
- # Read the image
19
- image = cv2.imread(image_path)
20
-
21
- # Calculate the new dimensions to achieve the target file size
22
- ratio = target_file_size_mb / file_size_mb
23
- new_width = int(image.shape[1] * np.sqrt(ratio))
24
- new_height = int(image.shape[0] * np.sqrt(ratio))
25
-
26
- # Enhance text
27
- enhanced_img = enhance_txt(image)
28
-
29
- # Resize the image
30
- enhanced = cv2.resize(enhanced_img, (new_width, new_height))
31
-
32
- return enhanced
33
-
34
- else:
35
- # If the file size is within the limit, proceed with the regular enhancement
36
- image = cv2.imread(image_path)
37
- enhanced = enhance_txt(image)
38
- return enhanced
39
-
40
- except Exception as e:
41
- print(f"An error occurred: {str(e)}")
42
- return None
43
-
44
-
45
- def enhance_txt(img, intensity_increase=20, bilateral_filter_diameter=9, bilateral_filter_sigma_color=75, bilateral_filter_sigma_space=75):
46
- # Get the width and height of the image
47
- w = img.shape[1]
48
- h = img.shape[0]
49
- w1 = int(w * 0.05)
50
- w2 = int(w * 0.95)
51
- h1 = int(h * 0.05)
52
- h2 = int(h * 0.95)
53
- ROI = img[h1:h2, w1:w2] # 95% of the center of the image
54
- threshold = np.mean(ROI) * 0.88 # % of average brightness
55
-
56
- blurred = cv2.GaussianBlur(img, (1, 1), 0)
57
- edged = 255 - cv2.Canny(blurred, 100, 150, apertureSize=7)
58
-
59
- # Increase intensity by adding a constant value
60
- img = np.clip(img + intensity_increase, 0, 255).astype(np.uint8)
61
-
62
- # Apply bilateral filter to reduce noise
63
- img = cv2.bilateralFilter(img, bilateral_filter_diameter, bilateral_filter_sigma_color, bilateral_filter_sigma_space)
64
-
65
- _, binary = cv2.threshold(blurred, threshold, 255, cv2.THRESH_BINARY)
66
- return binary
67
-
68
-
69
- def run_tesseract_on_preprocessed_image(preprocessed_image, image_path):
70
- try:
71
- image_name = os.path.basename(image_path)
72
- image_name = image_name[:image_name.find('.')]
73
-
74
- # Create the "temp" folder if it doesn't exist
75
- temp_folder = "temp"
76
- if not os.path.exists(temp_folder):
77
- os.makedirs(temp_folder)
78
-
79
- # Define the OCR API endpoint
80
- url = "https://api.ocr.space/parse/image"
81
-
82
- # Define the API key and the language
83
- api_key = "K88232854988957" # Replace with your actual OCR Space API key
84
- language = "eng"
85
-
86
- # Save the preprocessed image
87
- cv2.imwrite(os.path.join(temp_folder, f"{image_name}_preprocessed.jpg"), preprocessed_image)
88
-
89
- # Open the preprocessed image file as binary
90
- with open(os.path.join(temp_folder, f"{image_name}_preprocessed.jpg"), "rb") as f:
91
- # Define the payload for the API request
92
- payload = {
93
- "apikey": api_key,
94
- "language": language,
95
- "isOverlayRequired": True,
96
- "OCREngine": 2
97
- }
98
- # Define the file parameter for the API request
99
- file = {
100
- "file": f
101
- }
102
- # Send the POST request to the OCR API
103
- response = requests.post(url, data=payload, files=file)
104
-
105
- # Check the status code of the response
106
- if response.status_code == 200:
107
- # Parse the JSON response
108
- result = response.json()
109
- print("---JSON file saved")
110
- # Save the OCR result as JSON
111
- with open(os.path.join(temp_folder, f"{image_name}_ocr.json"), 'w') as f:
112
- json.dump(result, f)
113
-
114
- return os.path.join(temp_folder, f"{image_name}_ocr.json")
115
- else:
116
- # Print the error message
117
- print("Error: " + response.text)
118
- return None
119
-
120
- except Exception as e:
121
- print(f"An error occurred during OCR request: {str(e)}")
122
- return None
123
-
124
- def clean_tesseract_output(json_output_path):
125
- try:
126
- with open(json_output_path, 'r') as json_file:
127
- data = json.load(json_file)
128
-
129
- lines = data['ParsedResults'][0]['TextOverlay']['Lines']
130
-
131
- words = []
132
- for line in lines:
133
- for word_info in line['Words']:
134
- word = {}
135
- origin_box = [
136
- word_info['Left'],
137
- word_info['Top'],
138
- word_info['Left'] + word_info['Width'],
139
- word_info['Top'] + word_info['Height']
140
- ]
141
-
142
- word['word_text'] = word_info['WordText']
143
- word['word_box'] = origin_box
144
- words.append(word)
145
-
146
- return words
147
- except (KeyError, IndexError, FileNotFoundError, json.JSONDecodeError) as e:
148
- print(f"Error cleaning Tesseract output: {str(e)}")
149
- return None
150
-
151
- def prepare_batch_for_inference(image_paths):
152
- # print("my_function was called")
153
- # traceback.print_stack() # This will print the stack trace
154
- print(f"Number of images to process: {len(image_paths)}") # Print the total number of images to be processed
155
- print("1. Preparing for Inference")
156
- tsv_output_paths = []
157
-
158
- inference_batch = dict()
159
- print("2. Starting Preprocessing")
160
- # Ensure that the image is only 1
161
- for image_path in image_paths:
162
- print(f"Processing the image: {image_path}") # Print the image being processed
163
- print("3. Preprocessing the Receipt")
164
- preprocessed_image = preprocess_image(image_path)
165
- if preprocessed_image is not None:
166
- print("4. Preprocessing done. Running OCR")
167
- json_output_path = run_tesseract_on_preprocessed_image(preprocessed_image, image_path)
168
- print("5. OCR Complete")
169
- if json_output_path:
170
- tsv_output_paths.append(json_output_path)
171
-
172
- print("6. Preprocessing and OCR Done")
173
- # clean_outputs is a list of lists
174
- clean_outputs = [clean_tesseract_output(tsv_path) for tsv_path in tsv_output_paths]
175
- print("7. Cleaned OCR output")
176
- word_lists = [[word['word_text'] for word in clean_output] for clean_output in clean_outputs]
177
- print("8. Word List Created")
178
- boxes_lists = [[word['word_box'] for word in clean_output] for clean_output in clean_outputs]
179
- print("9. Box List Created")
180
- inference_batch = {
181
- "image_path": image_paths,
182
- "bboxes": boxes_lists,
183
- "words": word_lists
184
- }
185
-
186
- print("10. Prepared for Inference Batch")
187
- return inference_batch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static/inference/Layoutlmv3_inference/utils.py DELETED
@@ -1,50 +0,0 @@
1
- import numpy as np
2
- from transformers import AutoModelForTokenClassification, AutoProcessor
3
-
4
- def normalize_box(bbox, width, height):
5
- return [
6
- int(bbox[0]*(1000/width)),
7
- int(bbox[1]*(1000/height)),
8
- int(bbox[2]*(1000/width)),
9
- int(bbox[3]*(1000/height)),
10
- ]
11
-
12
- def compare_boxes(b1, b2):
13
- b1 = np.array([c for c in b1])
14
- b2 = np.array([c for c in b2])
15
- equal = np.array_equal(b1, b2)
16
- return equal
17
-
18
- def unnormalize_box(bbox, width, height):
19
- return [
20
- width * (bbox[0] / 1000),
21
- height * (bbox[1] / 1000),
22
- width * (bbox[2] / 1000),
23
- height * (bbox[3] / 1000),
24
- ]
25
-
26
- def adjacent(w1, w2):
27
- if w1['label'] == w2['label'] and abs(w1['id'] - w2['id']) == 1:
28
- return True
29
- return False
30
-
31
- def random_color():
32
- return np.random.randint(0, 255, 3)
33
-
34
- def image_label_2_color(annotation):
35
- if 'output' in annotation.keys():
36
- image_labels = set([span['label'] for span in annotation['output']])
37
- label2color = {f'{label}': (random_color()[0], random_color()[
38
- 1], random_color()[2]) for label in image_labels}
39
- return label2color
40
- else:
41
- raise ValueError('please use "output" as annotation key')
42
-
43
- def load_model(model_path):
44
- model = AutoModelForTokenClassification.from_pretrained(model_path)
45
- return model
46
-
47
- def load_processor():
48
- processor = AutoProcessor.from_pretrained(
49
- "microsoft/layoutlmv3-base", apply_ocr=False)
50
- return processor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static/inference/preprocess.py DELETED
@@ -1,206 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- import os
4
- import argparse
5
- from datasets.features import ClassLabel
6
- from transformers import AutoProcessor
7
- from sklearn.model_selection import train_test_split
8
- from datasets import Features, Sequence, ClassLabel, Value, Array2D, Array3D, Dataset
9
- from datasets import Image as Img
10
- from PIL import Image
11
- from tqdm import tqdm_notebook # Import tqdm_notebook for displaying progress bars
12
-
13
-
14
- import warnings
15
- warnings.filterwarnings('ignore')
16
-
17
-
18
- def read_text_file(file_path):
19
- with open(file_path, 'r') as f:
20
- return (f.readlines())
21
-
22
-
23
- def prepare_examples(examples):
24
- images = examples[image_column_name]
25
- words = examples[text_column_name]
26
- boxes = examples[boxes_column_name]
27
- word_labels = examples[label_column_name]
28
-
29
- encoding = processor(images, words, boxes=boxes, word_labels=word_labels,
30
- truncation=True, padding="max_length")
31
-
32
- return encoding
33
-
34
-
35
- def get_zip_dir_name():
36
- try:
37
- os.chdir('/kaggle/input/ocr-combinedrec')
38
-
39
- dir_list1 = os.listdir()
40
- dir_list = sorted(dir_list1)
41
-
42
- any_file_name = dir_list[0]
43
- # Using os.path.splitext to get the file extension
44
- zip_dir_name, file_extension = os.path.splitext(any_file_name)
45
-
46
- # Extracting the directory name using os.path.dirname
47
- # zip_dir_name = os.path.dirname(any_file_name)
48
-
49
- # Test
50
-
51
- return 'dataset_files'
52
-
53
- # Check if all files start with the extracted directory name
54
- print(all(list(map(lambda x: x.startswith(zip_dir_name), dir_list))))
55
- if all(list(map(lambda x: x.startswith(zip_dir_name), dir_list))):
56
- return zip_dir_name
57
- return False
58
- finally:
59
- os.chdir('./../')
60
-
61
- def filter_out_unannotated(example):
62
- tags = example['ner_tags']
63
- return not all([tag == label2id['O'] for tag in tags])
64
-
65
-
66
-
67
- if __name__ == '__main__':
68
-
69
- parser = argparse.ArgumentParser()
70
- parser.add_argument('--valid_size')
71
- parser.add_argument('--output_path')
72
- args = parser.parse_args()
73
- TEST_SIZE = float(args.valid_size)
74
- OUTPUT_PATH = args.output_path
75
-
76
- os.makedirs(args.output_path, exist_ok=True)
77
- files = {}
78
- zip_dir_name = get_zip_dir_name()
79
-
80
- if zip_dir_name:
81
- files['train_box'] = read_text_file('/kaggle/input/ocr-combinedrec/dataset_files_box.txt')
82
-
83
- files['train_image'] = read_text_file(os.path.join(
84
- os.curdir, 'ocr-combinedrec', f'{zip_dir_name}_image.txt'))
85
- files['train'] = read_text_file(os.path.join(
86
- os.curdir, 'ocr-combinedrec', f'{zip_dir_name}.txt'))
87
- else:
88
- for f in os.listdir():
89
- if f.endswith('.txt') and f.find('box') != -1:
90
- files['train_box'] = read_text_file(os.path.join(os.curdir, f))
91
- elif f.endswith('.txt') and f.find('image') != -1:
92
- files['train_image'] = read_text_file(
93
- os.path.join(os.curdir, f))
94
- elif f.endswith('.txt') and f.find('labels') == -1:
95
- files['train'] = read_text_file(os.path.join(os.curdir, f))
96
-
97
- assert(len(files['train']) == len(files['train_box']))
98
- assert(len(files['train_box']) == len(files['train_image']))
99
- assert(len(files['train_image']) == len(files['train']))
100
-
101
- images = {}
102
- for i, row in enumerate(files['train_image']):
103
- if row != '\n':
104
- image_name = row.split('\t')[-1]
105
- images.setdefault(image_name.replace('\n', ''), []).append(i)
106
-
107
- words, bboxes, ner_tags, image_path = [], [], [], []
108
- for image, rows in images.items():
109
- words.append([row.split('\t')[0].replace('\n', '')
110
- for row in files['train'][rows[0]:rows[-1]+1]])
111
- ner_tags.append([row.split('\t')[1].replace('\n', '')
112
- for row in files['train'][rows[0]:rows[-1]+1]])
113
- bboxes.append([box.split('\t')[1].replace('\n', '')
114
- for box in files['train_box'][rows[0]:rows[-1]+1]])
115
- if zip_dir_name:
116
- image_path.append(f"/kaggle/input/ocr-combinedrec/{zip_dir_name}/{image}")
117
- else:
118
- image_path.append(f"/kaggle/input/ocr-combinedrec/{image}")
119
-
120
- labels = list(set([tag for doc_tag in ner_tags for tag in doc_tag]))
121
- id2label = {v: k for v, k in enumerate(labels)}
122
- label2id = {k: v for v, k in enumerate(labels)}
123
-
124
- dataset_dict = {
125
- 'id': range(len(words)),
126
- 'tokens': words,
127
- 'bboxes': [[list(map(int, bbox.split())) for bbox in doc] for doc in bboxes],
128
- 'ner_tags': [[label2id[tag] for tag in ner_tag] for ner_tag in ner_tags],
129
- 'image': [Image.open(path).convert("RGB") for path in image_path]
130
- }
131
-
132
- #raw features
133
- features = Features({
134
- 'id': Value(dtype='string', id=None),
135
- 'tokens': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
136
- 'bboxes': Sequence(feature=Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None), length=-1, id=None),
137
- 'ner_tags': Sequence(feature=ClassLabel(num_classes=len(labels), names=labels, names_file=None, id=None), length=-1, id=None),
138
- 'image': Img(decode=True, id=None)
139
- })
140
-
141
- full_data_set = Dataset.from_dict(dataset_dict, features=features)
142
- dataset = full_data_set.train_test_split(test_size=TEST_SIZE)
143
- dataset["train"] = dataset["train"].filter(filter_out_unannotated)
144
- processor = AutoProcessor.from_pretrained(
145
- "microsoft/layoutlmv3-base", apply_ocr=False)
146
-
147
- features = dataset["train"].features
148
- column_names = dataset["train"].column_names
149
- image_column_name = "image"
150
- text_column_name = "tokens"
151
- boxes_column_name = "bboxes"
152
- label_column_name = "ner_tags"
153
-
154
- # In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the
155
- # unique labels.
156
-
157
-
158
- # def get_label_list(labels):
159
- # unique_labels = set()
160
- # for label in labels:
161
- # unique_labels = unique_labels | set(label)
162
- # label_list = list(unique_labels)
163
- # label_list.sort()
164
- # return label_list
165
-
166
-
167
- # if isinstance(features[label_column_name].feature, ClassLabel):
168
- # label_list = features[label_column_name].feature.names
169
- # # No need to convert the labels since they are already ints.
170
- # id2label = {k: v for k, v in enumerate(label_list)}
171
- # label2id = {v: k for k, v in enumerate(label_list)}
172
- # else:
173
- # label_list = get_label_list(dataset["train"][label_column_name])
174
- # id2label = {k: v for k, v in enumerate(label_list)}
175
- # label2id = {v: k for k, v in enumerate(label_list)}
176
- # num_labels = len(label_list)
177
-
178
-
179
-
180
- # we need to define custom features for `set_format` (used later on) to work properly
181
- features = Features({
182
- 'pixel_values': Array3D(dtype="float32", shape=(3, 224, 224)),
183
- 'input_ids': Sequence(feature=Value(dtype='int64')),
184
- 'attention_mask': Sequence(Value(dtype='int64')),
185
- 'bbox': Array2D(dtype="int64", shape=(512, 4)),
186
- 'labels': Sequence(ClassLabel(names=labels)),
187
- })
188
-
189
- train_dataset = dataset["train"].map(
190
- prepare_examples,
191
- batched=True,
192
- remove_columns=column_names,
193
- features=features,
194
- )
195
- eval_dataset = dataset["test"].map(
196
- prepare_examples,
197
- batched=True,
198
- remove_columns=column_names,
199
- features=features,
200
- )
201
- train_dataset.set_format("torch")
202
- if not OUTPUT_PATH.endswith('/'):
203
- OUTPUT_PATH += '/'
204
- train_dataset.save_to_disk(f'{OUTPUT_PATH}train_split')
205
- eval_dataset.save_to_disk(f'{OUTPUT_PATH}eval_split')
206
- dataset.save_to_disk(f'{OUTPUT_PATH}raw_data')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static/inference/run_inference.py DELETED
@@ -1,27 +0,0 @@
1
- import argparse
2
- from asyncio.log import logger
3
- from Layoutlmv3_inference.ocr import prepare_batch_for_inference
4
- from Layoutlmv3_inference.inference_handler import handle
5
- import logging
6
- import os
7
-
8
- if __name__ == "__main__":
9
- try:
10
- parser = argparse.ArgumentParser()
11
- parser.add_argument("--model_path", type=str)
12
- parser.add_argument("--images_path", type=str)
13
- args, _ = parser.parse_known_args()
14
- images_path = args.images_path
15
- image_files = os.listdir(images_path)
16
- images_path = [images_path + '/' + image_files[0]]
17
- inference_batch = prepare_batch_for_inference(images_path)
18
- context = {"model_dir": args.model_path}
19
- handle(inference_batch,context)
20
- except Exception as err:
21
- os.makedirs('log', exist_ok=True)
22
- logging.basicConfig(filename='log/error_output.log', level=logging.ERROR,
23
- format='%(asctime)s %(levelname)s %(name)s %(message)s')
24
- logger = logging.getLogger(__name__)
25
- logger.error(err)
26
-
27
-