Spaces:

Innovex
/

ExCeipt

Running

App Files Files Community

Scezui commited on Feb 22

Commit

fd03164

•

1 Parent(s): 5bc4c7c

fixed bugs in create_csv

Browse files

Files changed (36) hide show

Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc +0 -0
Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc +0 -0
Layoutlmv3_inference/annotate_image.py +1 -1
app.py +46 -49
experiment.ipynb +0 -1337
inferenced/csv_files/Output_0.csv +0 -4
inferenced/csv_files/Output_1.csv +0 -2
inferenced/csv_files/Output_2.csv +0 -3
inferenced/csv_files/Output_3.csv +0 -2
inferenced/csv_files/Output_4.csv +0 -2
inferenced/output.csv +0 -9
inferenced/sample1_711_inference.jpg +0 -0
inferenced/sample1_grace_inference.jpg +0 -0
inferenced/sample_711_inference.jpg +0 -0
inferenced/sample_coop_inference.jpg +0 -0
inferenced/sample_grace_inference.jpg +0 -0
log/error_output.log +18 -0
static/inference/Layoutlmv3_inference/__init__.py +0 -0
static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-310.pyc +0 -0
static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-311.pyc +0 -0
static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-312.pyc +0 -0
static/inference/Layoutlmv3_inference/__pycache__/annotate_image.cpython-310.pyc +0 -0
static/inference/Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc +0 -0
static/inference/Layoutlmv3_inference/__pycache__/inference_handler.cpython-310.pyc +0 -0
static/inference/Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc +0 -0
static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-310.pyc +0 -0
static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-311.pyc +0 -0
static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-312.pyc +0 -0
static/inference/Layoutlmv3_inference/__pycache__/utils.cpython-310.pyc +0 -0
static/inference/Layoutlmv3_inference/__pycache__/utils.cpython-311.pyc +0 -0
static/inference/Layoutlmv3_inference/annotate_image.py +0 -56
static/inference/Layoutlmv3_inference/inference_handler.py +0 -199
static/inference/Layoutlmv3_inference/ocr.py +0 -187
static/inference/Layoutlmv3_inference/utils.py +0 -50
static/inference/preprocess.py +0 -206
static/inference/run_inference.py +0 -27

Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc CHANGED Viewed

Binary files a/Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc and b/Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc differ

Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc CHANGED Viewed

Binary files a/Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc and b/Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc differ

Layoutlmv3_inference/annotate_image.py CHANGED Viewed

@@ -50,7 +50,7 @@ def annotate_image(image_path, annotation_object):
   image_name = os.path.basename(image_path)
   image_name = image_name[:image_name.find('.')]
-  output_folder = 'inferenced/'
   os.makedirs(output_folder, exist_ok=True)
   img.save(os.path.join(output_folder, f'{image_name}_inference.jpg'))

   image_name = os.path.basename(image_path)
   image_name = image_name[:image_name.find('.')]
+  output_folder = 'static/temp/inferenced/'
   os.makedirs(output_folder, exist_ok=True)
   img.save(os.path.join(output_folder, f'{image_name}_inference.jpg'))

app.py CHANGED Viewed

@@ -60,7 +60,7 @@ def index():
         # Source folders
         temp_folder = r'static/temp'
-        inferenced_folder = r'inferenced'
         # Destination folder path
         destination_folder = os.path.join('output_folders', dt_string)  # Create a new folder with timestamp
@@ -104,8 +104,8 @@ def make_predictions(image_paths):
     temp = None
     try:
         # For Windows OS
-        # temp = pathlib.PosixPath  # Save the original state
-        # pathlib.PosixPath = pathlib.WindowsPath  # Change to WindowsPath temporarily
         model_path = Path(r'model/export')
         learner = load_learner(model_path)
@@ -129,8 +129,8 @@ def make_predictions(image_paths):
     except Exception as e:
         return {"error in make_predictions": str(e)}
-    # finally:
-    #    pathlib.PosixPath = temp
 import copy
 @app.route('/predict/<filenames>', methods=['GET', 'POST'])
@@ -181,7 +181,7 @@ def predict_files(filenames):
 @app.route('/get_inference_image')
 def get_inference_image():
     # Assuming the new image is stored in the 'inferenced' folder with the name 'temp_inference.jpg'
-    inferenced_image = 'inferenced/temp_inference.jpg'
     return jsonify(updatedImagePath=inferenced_image), 200  # Return the image path with a 200 status code
@@ -231,7 +231,6 @@ def replace_symbols_with_period(value):
     return value.replace(',', '.')
-from itertools import zip_longest
 @app.route('/create_csv', methods=['GET'])
 def create_csv():
@@ -240,11 +239,14 @@ def create_csv():
         json_folder_path = r"static/temp/labeled"  # Change this to your folder path
         # Path to the output CSV folder
-        output_folder_path = r"inferenced/csv_files"
         os.makedirs(output_folder_path, exist_ok=True)
-        # Initialize an empty list to store all JSON data
-        all_data = []
         # Iterate through JSON files in the folder
         for filename in os.listdir(json_folder_path):
@@ -253,57 +255,39 @@ def create_csv():
                 with open(json_file_path, 'r') as file:
                     data = json.load(file)
-                    all_data.extend(data['output'])
-                # Creating a dictionary to store labels and corresponding texts for this JSON file
                 label_texts = {}
-                for item in data['output']:
                     label = item['label']
                     text = item['text']
-                    # Ensure label exists before adding to dictionary
-                    if label not in label_texts:
-                        label_texts[label] = []
-                    label_texts[label].append(text)
-                # Order of columns as requested
-                column_order = [
-                    'RECEIPTNUMBER', 'MERCHANTNAME', 'MERCHANTADDRESS',
-                    'TRANSACTIONDATE', 'TRANSACTIONTIME', 'ITEMS',
-                    'PRICE', 'TOTAL', 'VATTAX'
-                ]
                 # Writing data to CSV file with ordered columns
                 csv_file_path = os.path.join(output_folder_path, os.path.splitext(filename)[0] + '.csv')
                 with open(csv_file_path, 'w', newline='') as csvfile:
                     csv_writer = csv.DictWriter(csvfile, fieldnames=column_order, delimiter=",")
-                    csv_writer.writeheader()
-                    # Iterate through items and prices
-                    max_length = max(len(label_texts.get('ITEMS', [])), len(label_texts.get('PRICE', [])))
                     for i in range(max_length):
-                        # Use get() with default '' to avoid KeyError
-                        items = label_texts.get('ITEMS', [])[i] if i < len(label_texts.get('ITEMS', [])) else ''
-                        prices = label_texts.get('PRICE', [])[i] if i < len(label_texts.get('PRICE', [])) else ''
                         # Check if items and prices are separated by space
-                        if ' ' in items or ' ' in prices:
-                            item_list = items.split() if items else []
-                            price_list = prices.split() if prices else []
-                            # Create new rows for each combination of items and prices
-                            for item, price in zip(item_list, price_list):
-                                row_data = {label: replace_symbols_with_period(label_texts[label][i]) if label == 'ITEMS' else replace_symbols_with_period(label_texts[label][i]) for label in column_order}
-                                row_data['ITEMS'] = item
-                                row_data['PRICE'] = price
-                                csv_writer.writerow(row_data)
-                        else:
-                            # Use get() with default '' to avoid KeyError
-                            row_data = {label: replace_symbols_with_period(label_texts.get(label, [])[i]) if i < len(label_texts.get(label, [])) else '' for label in column_order}
-                            csv_writer.writerow(row_data)
         # Combining contents of CSV files into a single CSV file
-        output_file_path = r"inferenced/output.csv"
         with open(output_file_path, 'w', newline='') as combined_csvfile:
             combined_csv_writer = csv.DictWriter(combined_csvfile, fieldnames=column_order, delimiter=",")
             combined_csv_writer.writeheader()
@@ -324,17 +308,30 @@ def create_csv():
     except Exception as e:
         print(f"An error occurred in create_csv: {str(e)}")
         return None
 @app.route('/get_data')
 def get_data():
-    return send_from_directory('inferenced','output.csv', as_attachment=False)
 from flask import jsonify
 @app.route('/download_csv', methods=['GET'])
 def download_csv():
     try:
-        output_file_path = r"inferenced/output.csv"  # path to output CSV file
         # Check if the file exists
         if os.path.exists(output_file_path):
             return send_file(output_file_path, as_attachment=True, download_name='output.csv')

         # Source folders
         temp_folder = r'static/temp'
+        inferenced_folder = r'static/temp/inferenced'
         # Destination folder path
         destination_folder = os.path.join('output_folders', dt_string)  # Create a new folder with timestamp
     temp = None
     try:
         # For Windows OS
+        temp = pathlib.PosixPath  # Save the original state
+        pathlib.PosixPath = pathlib.WindowsPath  # Change to WindowsPath temporarily
         model_path = Path(r'model/export')
         learner = load_learner(model_path)
     except Exception as e:
         return {"error in make_predictions": str(e)}
+    finally:
+        pathlib.PosixPath = temp
 import copy
 @app.route('/predict/<filenames>', methods=['GET', 'POST'])
 @app.route('/get_inference_image')
 def get_inference_image():
     # Assuming the new image is stored in the 'inferenced' folder with the name 'temp_inference.jpg'
+    inferenced_image = 'static/temp/inferenced/temp_inference.jpg'
     return jsonify(updatedImagePath=inferenced_image), 200  # Return the image path with a 200 status code
     return value.replace(',', '.')
 @app.route('/create_csv', methods=['GET'])
 def create_csv():
         json_folder_path = r"static/temp/labeled"  # Change this to your folder path
         # Path to the output CSV folder
+        output_folder_path = r"static/temp/inferenced/csv_files"
         os.makedirs(output_folder_path, exist_ok=True)
+        column_order = [
+            'RECEIPTNUMBER', 'MERCHANTNAME', 'MERCHANTADDRESS',
+            'TRANSACTIONDATE', 'TRANSACTIONTIME', 'ITEMS',
+            'PRICE', 'TOTAL', 'VATTAX'
+        ]
         # Iterate through JSON files in the folder
         for filename in os.listdir(json_folder_path):
                 with open(json_file_path, 'r') as file:
                     data = json.load(file)
+                    all_data = data.get('output', [])
+                # Initialize a dictionary to store labels and corresponding texts for this JSON file
                 label_texts = {}
+                for item in all_data:
                     label = item['label']
                     text = item['text']
+                    label_texts[label] = text
                 # Writing data to CSV file with ordered columns
                 csv_file_path = os.path.join(output_folder_path, os.path.splitext(filename)[0] + '.csv')
                 with open(csv_file_path, 'w', newline='') as csvfile:
                     csv_writer = csv.DictWriter(csvfile, fieldnames=column_order, delimiter=",")
+                    if os.path.getsize(csv_file_path) == 0:
+                        csv_writer.writeheader()
+                    # Constructing rows for the CSV file
+                    items = label_texts.get('ITEMS', '').split()
+                    prices = label_texts.get('PRICE', '').split()
+                    max_length = max(len(items), len(prices))
                     for i in range(max_length):
+                        row_data = {}
+                        for label in column_order:
+                            # Use get() with default '' to handle missing labels gracefully
+                            row_data[label] = label_texts.get(label, '')
                         # Check if items and prices are separated by space
+                        if i < len(items) and i < len(prices):
+                            row_data['ITEMS'] = items[i]
+                            row_data['PRICE'] = prices[i]
+                        csv_writer.writerow(row_data)
         # Combining contents of CSV files into a single CSV file
+        output_file_path = r"static/temp/inferenced/output.csv"
         with open(output_file_path, 'w', newline='') as combined_csvfile:
             combined_csv_writer = csv.DictWriter(combined_csvfile, fieldnames=column_order, delimiter=",")
             combined_csv_writer.writeheader()
     except Exception as e:
         print(f"An error occurred in create_csv: {str(e)}")
         return None
+    except FileNotFoundError as e:
+        print(f"File not found error: {str(e)}")
+        return jsonify({'error': 'File not found.'}), 404
+    except json.JSONDecodeError as e:
+        print(f"JSON decoding error: {str(e)}")
+        return jsonify({'error': 'JSON decoding error.'}), 500
+    except csv.Error as e:
+        print(f"CSV error: {str(e)}")
+        return jsonify({'error': 'CSV error.'}), 500
+    except Exception as e:
+        print(f"An unexpected error occurred: {str(e)}")
+        return jsonify({'error': 'An unexpected error occurred.'}), 500
 @app.route('/get_data')
 def get_data():
+    return send_from_directory('static/temp/inferenced','output.csv', as_attachment=False)
 from flask import jsonify
 @app.route('/download_csv', methods=['GET'])
 def download_csv():
     try:
+        output_file_path = r"static/temp/inferenced/output.csv"  # path to output CSV file
         # Check if the file exists
         if os.path.exists(output_file_path):
             return send_file(output_file_path, as_attachment=True, download_name='output.csv')

experiment.ipynb DELETED Viewed

@@ -1,1337 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# defining inference parameters\n",
-    "model_path = r\"C:\\Users\\Ayoo\\Desktop\\webapp\\model\" # path to Layoutlmv3 model\n",
-    "imag_path = r\"C:\\Users\\Ayoo\\Desktop\\webapp\\predictions\\imgs\" # images folder"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "^C\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2023-12-16 02:35:50.587274: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
-      "WARNING:tensorflow:From C:\\Users\\Ayoo\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
-      "\n",
-      "c:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\site-packages\\transformers\\modeling_utils.py:881: FutureWarning: The `device` argument is deprecated and will be removed in v5 of Transformers.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Preparing for Inference\n",
-      "Starting\n",
-      "Preprocessing\n",
-      "Preprocessing done. Running OCR\n",
-      "JSON file saved\n",
-      "OCR done\n",
-      "Run Done\n",
-      "Cleaned Tesseract output done\n",
-      "Word list done\n",
-      "Box list done\n",
-      "Prepared for Inference Batch\n",
-      "Running Flattened Output\n",
-      "Ready for Annotation\n",
-      "Annotating Images\n"
-     ]
-    }
-   ],
-   "source": [
-    "! python predictions\\inference\\run_inference.py --model_path {model_path} --images_path {imag_path}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Looking for C:\\Users\\Ayoo\\.keras-ocr\\craft_mlt_25k.h5\n"
-     ]
-    },
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "Cell \u001b[1;32mIn[20], line 2\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mkeras_ocr\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m pipeline\u001b[38;5;241m=\u001b[39m\u001b[43mkeras_ocr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipeline\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mPipeline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\site-packages\\keras_ocr\\pipeline.py:20\u001b[0m, in \u001b[0;36mPipeline.__init__\u001b[1;34m(self, detector, recognizer, scale, max_size)\u001b[0m\n\u001b[0;32m     18\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, detector\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, recognizer\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, scale\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m, max_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2048\u001b[39m):\n\u001b[0;32m     19\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m detector \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m---> 20\u001b[0m         detector \u001b[38;5;241m=\u001b[39m \u001b[43mdetection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDetector\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     21\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m recognizer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m     22\u001b[0m         recognizer \u001b[38;5;241m=\u001b[39m recognition\u001b[38;5;241m.\u001b[39mRecognizer()\n",
-      "File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\site-packages\\keras_ocr\\detection.py:686\u001b[0m, in \u001b[0;36mDetector.__init__\u001b[1;34m(self, weights, load_from_torch, optimizer, backbone_name)\u001b[0m\n\u001b[0;32m    682\u001b[0m     \u001b[38;5;28;01massert\u001b[39;00m (\n\u001b[0;32m    683\u001b[0m         pretrained_key \u001b[38;5;129;01min\u001b[39;00m PRETRAINED_WEIGHTS\n\u001b[0;32m    684\u001b[0m     ), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSelected weights configuration not found.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    685\u001b[0m     weights_config \u001b[38;5;241m=\u001b[39m PRETRAINED_WEIGHTS[pretrained_key]\n\u001b[1;32m--> 686\u001b[0m     weights_path \u001b[38;5;241m=\u001b[39m \u001b[43mtools\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_and_verify\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    687\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights_config\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43murl\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    688\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights_config\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfilename\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    689\u001b[0m \u001b[43m        \u001b[49m\u001b[43msha256\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights_config\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msha256\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    690\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    691\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    692\u001b[0m     weights_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
-      "File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\site-packages\\keras_ocr\\tools.py:527\u001b[0m, in \u001b[0;36mdownload_and_verify\u001b[1;34m(url, sha256, cache_dir, verbose, filename)\u001b[0m\n\u001b[0;32m    525\u001b[0m         \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloading \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m filepath)\n\u001b[0;32m    526\u001b[0m     urllib\u001b[38;5;241m.\u001b[39mrequest\u001b[38;5;241m.\u001b[39murlretrieve(url, filepath)\n\u001b[1;32m--> 527\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m sha256 \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m sha256 \u001b[38;5;241m==\u001b[39m \u001b[43msha256sum\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    528\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfilepath\u001b[49m\n\u001b[0;32m    529\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError occurred verifying sha256.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    530\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m filepath\n",
-      "File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\site-packages\\keras_ocr\\tools.py:491\u001b[0m, in \u001b[0;36msha256sum\u001b[1;34m(filename)\u001b[0m\n\u001b[0;32m    489\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(filename, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrb\u001b[39m\u001b[38;5;124m\"\u001b[39m, buffering\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[0;32m    490\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m n \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28miter\u001b[39m(\u001b[38;5;28;01mlambda\u001b[39;00m: f\u001b[38;5;241m.\u001b[39mreadinto(mv), \u001b[38;5;241m0\u001b[39m):  \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m--> 491\u001b[0m         h\u001b[38;5;241m.\u001b[39mupdate(mv[:n])\n\u001b[0;32m    492\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m h\u001b[38;5;241m.\u001b[39mhexdigest()\n",
-      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
-     ]
-    }
-   ],
-   "source": [
-    "import keras_ocr\n",
-    "pipeline=keras_ocr.pipeline.Pipeline()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1/1 [==============================] - 34s 34s/step\n",
-      "7/7 [==============================] - 94s 13s/step\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "[[('feleven',\n",
-       "   array([[212.58102 ,  34.90136 ],\n",
-       "          [577.45886 ,  34.901367],\n",
-       "          [577.45886 , 114.22263 ],\n",
-       "          [212.58102 , 114.22263 ]], dtype=float32)),\n",
-       "  ('es',\n",
-       "   array([[574.28613,  82.49414],\n",
-       "          [593.32324,  82.49414],\n",
-       "          [593.32324, 107.87695],\n",
-       "          [574.28613, 107.87695]], dtype=float32)),\n",
-       "  ('store',\n",
-       "   array([[453.71777, 203.0625 ],\n",
-       "          [567.9404 , 203.0625 ],\n",
-       "          [567.9404 , 253.82812],\n",
-       "          [453.71777, 253.82812]], dtype=float32)),\n",
-       "  ('nahj',\n",
-       "   array([[120.56836, 209.4082 ],\n",
-       "          [187.19824, 209.4082 ],\n",
-       "          [187.19824, 253.82812],\n",
-       "          [120.56836, 253.82812]], dtype=float32)),\n",
-       "  ('conveni',\n",
-       "   array([[203.0625 , 209.4082 ],\n",
-       "          [352.18652, 209.4082 ],\n",
-       "          [352.18652, 253.82812],\n",
-       "          [203.0625 , 253.82812]], dtype=float32)),\n",
-       "  ('enco',\n",
-       "   array([[352.18652, 209.4082 ],\n",
-       "          [441.02637, 209.4082 ],\n",
-       "          [441.02637, 253.82812],\n",
-       "          [352.18652, 253.82812]], dtype=float32)),\n",
-       "  ('qwned',\n",
-       "   array([[ 34.901367, 260.17383 ],\n",
-       "          [149.12402 , 260.17383 ],\n",
-       "          [149.12402 , 304.59375 ],\n",
-       "          [ 34.901367, 304.59375 ]], dtype=float32)),\n",
-       "  ('operated',\n",
-       "   array([[203.0625 , 260.17383],\n",
-       "          [377.56934, 260.17383],\n",
-       "          [377.56934, 307.7666 ],\n",
-       "          [203.0625 , 307.7666 ]], dtype=float32)),\n",
-       "  ('nancy',\n",
-       "   array([[475.92773, 260.17383],\n",
-       "          [586.97754, 260.17383],\n",
-       "          [586.97754, 304.59375],\n",
-       "          [475.92773, 304.59375]], dtype=float32)),\n",
-       "  ('byl',\n",
-       "   array([[393.4336 , 263.34668],\n",
-       "          [456.89062, 263.34668],\n",
-       "          [456.89062, 307.7666 ],\n",
-       "          [393.4336 , 307.7666 ]], dtype=float32)),\n",
-       "  ('a',\n",
-       "   array([[602.8418 , 263.34668],\n",
-       "          [634.5703 , 263.34668],\n",
-       "          [634.5703 , 301.4209 ],\n",
-       "          [602.8418 , 301.4209 ]], dtype=float32)),\n",
-       "  ('cl',\n",
-       "   array([[244.30957, 314.1123 ],\n",
-       "          [288.7295 , 314.1123 ],\n",
-       "          [288.7295 , 355.35938],\n",
-       "          [244.30957, 355.35938]], dtype=float32)),\n",
-       "  ('inacosa',\n",
-       "   array([[291.90234, 314.1123 ],\n",
-       "          [437.85352, 314.1123 ],\n",
-       "          [437.85352, 355.35938],\n",
-       "          [291.90234, 355.35938]], dtype=float32)),\n",
-       "  ('tregtin',\n",
-       "   array([[123.74121, 358.53223],\n",
-       "          [276.0381 , 358.53223],\n",
-       "          [276.0381 , 406.125  ],\n",
-       "          [123.74121, 406.125  ]], dtype=float32)),\n",
-       "  ('va',\n",
-       "   array([[ 76.14844, 361.70508],\n",
-       "          [123.74121, 361.70508],\n",
-       "          [123.74121, 406.125  ],\n",
-       "          [ 76.14844, 406.125  ]], dtype=float32)),\n",
-       "  ('hssysigm',\n",
-       "   array([[285.55664, 361.70508],\n",
-       "          [485.4463 , 361.70508],\n",
-       "          [485.4463 , 406.125  ],\n",
-       "          [285.55664, 406.125  ]], dtype=float32)),\n",
-       "  ('gbsr0o2',\n",
-       "   array([[475.92773, 361.70508],\n",
-       "          [631.39746, 361.70508],\n",
-       "          [631.39746, 406.125  ],\n",
-       "          [475.92773, 406.125  ]], dtype=float32)),\n",
-       "  ('pobli',\n",
-       "   array([[ 98.3584 , 412.4707 ],\n",
-       "          [187.19824, 412.4707 ],\n",
-       "          [187.19824, 460.06348],\n",
-       "          [ 98.3584 , 460.06348]], dtype=float32)),\n",
-       "  ('acii',\n",
-       "   array([[180.85254, 415.64355],\n",
-       "          [250.65527, 415.64355],\n",
-       "          [250.65527, 460.06348],\n",
-       "          [180.85254, 460.06348]], dtype=float32)),\n",
-       "  ('leons',\n",
-       "   array([[326.8037 , 415.64355],\n",
-       "          [434.68066, 415.64355],\n",
-       "          [434.68066, 460.06348],\n",
-       "          [326.8037 , 460.06348]], dtype=float32)),\n",
-       "  ('ilulos',\n",
-       "   array([[456.89062, 415.64355],\n",
-       "          [602.8418 , 415.64355],\n",
-       "          [602.8418 , 460.06348],\n",
-       "          [456.89062, 460.06348]], dtype=float32)),\n",
-       "  ('ors',\n",
-       "   array([[241.13672, 418.8164 ],\n",
-       "          [304.59375, 418.8164 ],\n",
-       "          [304.59375, 456.89062],\n",
-       "          [241.13672, 456.89062]], dtype=float32)),\n",
-       "  ('fit',\n",
-       "   array([[225.27246, 466.40918],\n",
-       "          [291.90234, 466.40918],\n",
-       "          [291.90234, 510.8291 ],\n",
-       "          [225.27246, 510.8291 ]], dtype=float32)),\n",
-       "  ('ipp',\n",
-       "   array([[314.1123 , 469.58203],\n",
-       "          [380.7422 , 469.58203],\n",
-       "          [380.7422 , 514.00195],\n",
-       "          [314.1123 , 514.00195]], dtype=float32)),\n",
-       "  ('ines',\n",
-       "   array([[374.39648, 469.58203],\n",
-       "          [463.23633, 469.58203],\n",
-       "          [463.23633, 510.8291 ],\n",
-       "          [374.39648, 510.8291 ]], dtype=float32)),\n",
-       "  ('tel',\n",
-       "   array([[225.27246, 517.1748 ],\n",
-       "          [288.7295 , 517.1748 ],\n",
-       "          [288.7295 , 561.5947 ],\n",
-       "          [225.27246, 561.5947 ]], dtype=float32)),\n",
-       "  ('null',\n",
-       "   array([[371.22363, 517.1748 ],\n",
-       "          [466.40918, 517.1748 ],\n",
-       "          [466.40918, 561.5947 ],\n",
-       "          [371.22363, 561.5947 ]], dtype=float32)),\n",
-       "  ('h',\n",
-       "   array([[307.7666 , 520.34766],\n",
-       "          [339.49512, 520.34766],\n",
-       "          [339.49512, 558.4219 ],\n",
-       "          [307.7666 , 558.4219 ]], dtype=float32)),\n",
-       "  ('osd1',\n",
-       "   array([[ 98.3584 , 618.70605],\n",
-       "          [206.23535, 618.70605],\n",
-       "          [206.23535, 663.126  ],\n",
-       "          [ 98.3584 , 663.126  ]], dtype=float32)),\n",
-       "  ('fzozx',\n",
-       "   array([[203.0625 , 618.70605],\n",
-       "          [314.1123 , 618.70605],\n",
-       "          [314.1123 , 663.126  ],\n",
-       "          [203.0625 , 663.126  ]], dtype=float32)),\n",
-       "  ('leoost',\n",
-       "   array([[434.68066, 618.70605],\n",
-       "          [609.1875 , 618.70605],\n",
-       "          [609.1875 , 663.126  ],\n",
-       "          [434.68066, 663.126  ]], dtype=float32)),\n",
-       "  ('smony',\n",
-       "   array([[314.1123 , 621.8789 ],\n",
-       "          [415.64355, 621.8789 ],\n",
-       "          [415.64355, 663.126  ],\n",
-       "          [314.1123 , 663.126  ]], dtype=float32)),\n",
-       "  ('rcpt',\n",
-       "   array([[ 12.691406, 723.41016 ],\n",
-       "          [101.53125 , 723.41016 ],\n",
-       "          [101.53125 , 767.8301  ],\n",
-       "          [ 12.691406, 767.8301  ]], dtype=float32)),\n",
-       "  ('h2a81',\n",
-       "   array([[117.39551, 723.41016],\n",
-       "          [228.44531, 723.41016],\n",
-       "          [228.44531, 767.8301 ],\n",
-       "          [117.39551, 767.8301 ]], dtype=float32)),\n",
-       "  ('3a7',\n",
-       "   array([[218.92676, 723.41016],\n",
-       "          [291.90234, 723.41016],\n",
-       "          [291.90234, 767.8301 ],\n",
-       "          [218.92676, 767.8301 ]], dtype=float32)),\n",
-       "  ('rcft',\n",
-       "   array([[475.92773, 723.41016],\n",
-       "          [567.9404 , 723.41016],\n",
-       "          [567.9404 , 767.8301 ],\n",
-       "          [475.92773, 767.8301 ]], dtype=float32)),\n",
-       "  ('cnt',\n",
-       "   array([[580.63184, 723.41016],\n",
-       "          [647.2617 , 723.41016],\n",
-       "          [647.2617 , 764.6572 ],\n",
-       "          [580.63184, 764.6572 ]], dtype=float32)),\n",
-       "  ('ho',\n",
-       "   array([[637.74316, 723.41016],\n",
-       "          [694.8545 , 723.41016],\n",
-       "          [694.8545 , 767.8301 ],\n",
-       "          [637.74316, 767.8301 ]], dtype=float32)),\n",
-       "  ('storehsise',\n",
-       "   array([[ 12.691406, 774.1758  ],\n",
-       "          [231.61816 , 774.1758  ],\n",
-       "          [231.61816 , 818.5957  ],\n",
-       "          [ 12.691406, 818.5957  ]], dtype=float32)),\n",
-       "  ('snit',\n",
-       "   array([[434.68066, 774.1758 ],\n",
-       "          [504.4834 , 774.1758 ],\n",
-       "          [504.4834 , 818.5957 ],\n",
-       "          [434.68066, 818.5957 ]], dtype=float32)),\n",
-       "  ('xtiakt',\n",
-       "   array([[520.34766, 774.1758 ],\n",
-       "          [650.4346 , 774.1758 ],\n",
-       "          [650.4346 , 818.5957 ],\n",
-       "          [520.34766, 818.5957 ]], dtype=float32)),\n",
-       "  ('70',\n",
-       "   array([[647.2617, 774.1758],\n",
-       "          [694.8545, 774.1758],\n",
-       "          [694.8545, 818.5957],\n",
-       "          [647.2617, 818.5957]], dtype=float32)),\n",
-       "  ('091',\n",
-       "   array([[326.8037 , 821.76855],\n",
-       "          [396.60645, 821.76855],\n",
-       "          [396.60645, 869.3613 ],\n",
-       "          [326.8037 , 869.3613 ]], dtype=float32)),\n",
-       "  ('min',\n",
-       "   array([[ 15.864258, 824.9414  ],\n",
-       "          [ 85.66699 , 824.9414  ],\n",
-       "          [ 85.66699 , 869.3613  ],\n",
-       "          [ 15.864258, 869.3613  ]], dtype=float32)),\n",
-       "  ('1811201',\n",
-       "   array([[161.81543, 824.9414 ],\n",
-       "          [310.93945, 824.9414 ],\n",
-       "          [310.93945, 869.3613 ],\n",
-       "          [161.81543, 869.3613 ]], dtype=float32)),\n",
-       "  ('105s1',\n",
-       "   array([[437.85352, 824.9414 ],\n",
-       "          [520.34766, 824.9414 ],\n",
-       "          [520.34766, 869.3613 ],\n",
-       "          [437.85352, 869.3613 ]], dtype=float32)),\n",
-       "  ('ha',\n",
-       "   array([[ 98.3584 , 828.11426],\n",
-       "          [139.60547, 828.11426],\n",
-       "          [139.60547, 869.3613 ],\n",
-       "          [ 98.3584 , 869.3613 ]], dtype=float32)),\n",
-       "  ('41',\n",
-       "   array([[393.4336 , 828.11426],\n",
-       "          [441.02637, 828.11426],\n",
-       "          [441.02637, 869.3613 ],\n",
-       "          [393.4336 , 869.3613 ]], dtype=float32)),\n",
-       "  ('staff',\n",
-       "   array([[ 12.691406, 878.8799  ],\n",
-       "          [126.91406 , 878.8799  ],\n",
-       "          [126.91406 , 923.2998  ],\n",
-       "          [ 12.691406, 923.2998  ]], dtype=float32)),\n",
-       "  ('angel',\n",
-       "   array([[142.77832, 878.8799 ],\n",
-       "          [247.48242, 878.8799 ],\n",
-       "          [247.48242, 923.2998 ],\n",
-       "          [142.77832, 923.2998 ]], dtype=float32)),\n",
-       "  ('duantle',\n",
-       "   array([[329.97656, 878.8799 ],\n",
-       "          [463.23633, 878.8799 ],\n",
-       "          [463.23633, 923.2998 ],\n",
-       "          [329.97656, 923.2998 ]], dtype=float32)),\n",
-       "  ('i',\n",
-       "   array([[250.65527, 885.2256 ],\n",
-       "          [266.51953, 885.2256 ],\n",
-       "          [266.51953, 916.9541 ],\n",
-       "          [250.65527, 916.9541 ]], dtype=float32)),\n",
-       "  ('ca',\n",
-       "   array([[263.34668, 885.2256 ],\n",
-       "          [314.1123 , 885.2256 ],\n",
-       "          [314.1123 , 923.2998 ],\n",
-       "          [263.34668, 923.2998 ]], dtype=float32)),\n",
-       "  ('fkoreanbun',\n",
-       "   array([[  15.864258,  980.41113 ],\n",
-       "          [ 250.65527 ,  980.41113 ],\n",
-       "          [ 250.65527 , 1024.831   ],\n",
-       "          [  15.864258, 1024.831   ]], dtype=float32)),\n",
-       "  ('s5',\n",
-       "   array([[ 561.5947 ,  980.41113],\n",
-       "          [ 612.36035,  980.41113],\n",
-       "          [ 612.36035, 1021.6582 ],\n",
-       "          [ 561.5947 , 1021.6582 ]], dtype=float32)),\n",
-       "  ('oflj',\n",
-       "   array([[ 621.8789 ,  980.41113],\n",
-       "          [ 694.8545 ,  980.41113],\n",
-       "          [ 694.8545 , 1021.6582 ],\n",
-       "          [ 621.8789 , 1021.6582 ]], dtype=float32)),\n",
-       "  ('nis',\n",
-       "   array([[  15.864258, 1031.1768  ],\n",
-       "          [  60.28418 , 1031.1768  ],\n",
-       "          [  60.28418 , 1075.5967  ],\n",
-       "          [  15.864258, 1075.5967  ]], dtype=float32)),\n",
-       "  ('inyasabeetig',\n",
-       "   array([[ 104.7041 , 1031.1768 ],\n",
-       "          [ 377.56934, 1031.1768 ],\n",
-       "          [ 377.56934, 1078.7695 ],\n",
-       "          [ 104.7041 , 1078.7695 ]], dtype=float32)),\n",
-       "  ('40',\n",
-       "   array([[ 561.5947, 1031.1768],\n",
-       "          [ 615.5332, 1031.1768],\n",
-       "          [ 615.5332, 1072.4238],\n",
-       "          [ 561.5947, 1072.4238]], dtype=float32)),\n",
-       "  ('oov',\n",
-       "   array([[ 621.8789, 1031.1768],\n",
-       "          [ 694.8545, 1031.1768],\n",
-       "          [ 694.8545, 1072.4238],\n",
-       "          [ 621.8789, 1072.4238]], dtype=float32)),\n",
-       "  ('ss',\n",
-       "   array([[  53.938477, 1034.3496  ],\n",
-       "          [ 104.7041  , 1034.3496  ],\n",
-       "          [ 104.7041  , 1075.5967  ],\n",
-       "          [  53.938477, 1075.5967  ]], dtype=float32)),\n",
-       "  ('behotogcremychees',\n",
-       "   array([[  12.691406, 1081.9424  ],\n",
-       "          [ 399.7793  , 1081.9424  ],\n",
-       "          [ 399.7793  , 1129.5352  ],\n",
-       "          [  12.691406, 1129.5352  ]], dtype=float32)),\n",
-       "  ('19',\n",
-       "   array([[ 139.60547, 1132.708  ],\n",
-       "          [ 190.3711 , 1132.708  ],\n",
-       "          [ 190.3711 , 1177.1279 ],\n",
-       "          [ 139.60547, 1177.1279 ]], dtype=float32)),\n",
-       "  ('do',\n",
-       "   array([[ 203.0625 , 1135.8809 ],\n",
-       "          [ 250.65527, 1135.8809 ],\n",
-       "          [ 250.65527, 1177.1279 ],\n",
-       "          [ 203.0625 , 1177.1279 ]], dtype=float32)),\n",
-       "  ('a',\n",
-       "   array([[ 266.51953, 1139.0537 ],\n",
-       "          [ 288.7295 , 1139.0537 ],\n",
-       "          [ 288.7295 , 1173.9551 ],\n",
-       "          [ 266.51953, 1173.9551 ]], dtype=float32)),\n",
-       "  ('b',\n",
-       "   array([[ 368.05078, 1135.8809 ],\n",
-       "          [ 396.60645, 1135.8809 ],\n",
-       "          [ 396.60645, 1173.9551 ],\n",
-       "          [ 368.05078, 1173.9551 ]], dtype=float32)),\n",
-       "  ('1544',\n",
-       "   array([[ 539.38477, 1135.8809 ],\n",
-       "          [ 615.5332 , 1135.8809 ],\n",
-       "          [ 615.5332 , 1177.1279 ],\n",
-       "          [ 539.38477, 1177.1279 ]], dtype=float32)),\n",
-       "  ('oou',\n",
-       "   array([[ 621.8789, 1135.8809],\n",
-       "          [ 694.8545, 1135.8809],\n",
-       "          [ 694.8545, 1177.1279],\n",
-       "          [ 621.8789, 1177.1279]], dtype=float32)),\n",
-       "  ('choeog',\n",
-       "   array([[ 266.51953, 1183.4736 ],\n",
-       "          [ 399.7793 , 1183.4736 ],\n",
-       "          [ 399.7793 , 1231.0664 ],\n",
-       "          [ 266.51953, 1231.0664 ]], dtype=float32)),\n",
-       "  ('chocvronz',\n",
-       "   array([[  12.691406, 1186.6465  ],\n",
-       "          [ 209.4082  , 1186.6465  ],\n",
-       "          [ 209.4082  , 1231.0664  ],\n",
-       "          [  12.691406, 1231.0664  ]], dtype=float32)),\n",
-       "  ('in1',\n",
-       "   array([[ 206.23535, 1186.6465 ],\n",
-       "          [ 269.69238, 1186.6465 ],\n",
-       "          [ 269.69238, 1227.8936 ],\n",
-       "          [ 206.23535, 1227.8936 ]], dtype=float32)),\n",
-       "  ('1s',\n",
-       "   array([[ 142.77832, 1237.4121 ],\n",
-       "          [ 206.23535, 1237.4121 ],\n",
-       "          [ 206.23535, 1281.832  ],\n",
-       "          [ 142.77832, 1281.832  ]], dtype=float32)),\n",
-       "  ('0',\n",
-       "   array([[ 203.0625 , 1237.4121 ],\n",
-       "          [ 250.65527, 1237.4121 ],\n",
-       "          [ 250.65527, 1281.832  ],\n",
-       "          [ 203.0625 , 1281.832  ]], dtype=float32)),\n",
-       "  ('x',\n",
-       "   array([[ 263.34668, 1237.4121 ],\n",
-       "          [ 291.90234, 1237.4121 ],\n",
-       "          [ 291.90234, 1275.4863 ],\n",
-       "          [ 263.34668, 1275.4863 ]], dtype=float32)),\n",
-       "  ('l',\n",
-       "   array([[ 371.22363, 1237.4121 ],\n",
-       "          [ 396.60645, 1237.4121 ],\n",
-       "          [ 396.60645, 1275.4863 ],\n",
-       "          [ 371.22363, 1275.4863 ]], dtype=float32)),\n",
-       "  ('50',\n",
-       "   array([[ 561.5947, 1237.4121],\n",
-       "          [ 615.5332, 1237.4121],\n",
-       "          [ 615.5332, 1278.6592],\n",
-       "          [ 561.5947, 1278.6592]], dtype=float32)),\n",
-       "  ('doq',\n",
-       "   array([[ 621.8789, 1237.4121],\n",
-       "          [ 694.8545, 1237.4121],\n",
-       "          [ 694.8545, 1278.6592],\n",
-       "          [ 621.8789, 1278.6592]], dtype=float32)),\n",
-       "  ('total',\n",
-       "   array([[  15.864258, 1338.9434  ],\n",
-       "          [ 120.56836 , 1338.9434  ],\n",
-       "          [ 120.56836 , 1386.5361  ],\n",
-       "          [  15.864258, 1386.5361  ]], dtype=float32)),\n",
-       "  ('10',\n",
-       "   array([[ 145.95117, 1338.9434 ],\n",
-       "          [ 225.27246, 1338.9434 ],\n",
-       "          [ 225.27246, 1383.3633 ],\n",
-       "          [ 145.95117, 1383.3633 ]], dtype=float32)),\n",
-       "  ('3599',\n",
-       "   array([[ 558.4219 , 1338.9434 ],\n",
-       "          [ 637.74316, 1338.9434 ],\n",
-       "          [ 637.74316, 1383.3633 ],\n",
-       "          [ 558.4219 , 1383.3633 ]], dtype=float32)),\n",
-       "  ('oq',\n",
-       "   array([[ 640.916 , 1342.1162],\n",
-       "          [ 694.8545, 1342.1162],\n",
-       "          [ 694.8545, 1383.3633],\n",
-       "          [ 640.916 , 1383.3633]], dtype=float32)),\n",
-       "  ('cash',\n",
-       "   array([[  53.938477, 1389.709   ],\n",
-       "          [ 149.12402 , 1389.709   ],\n",
-       "          [ 149.12402 , 1434.1289  ],\n",
-       "          [  53.938477, 1434.1289  ]], dtype=float32)),\n",
-       "  ('dool',\n",
-       "   array([[ 558.4219, 1389.709 ],\n",
-       "          [ 647.2617, 1389.709 ],\n",
-       "          [ 647.2617, 1434.1289],\n",
-       "          [ 558.4219, 1434.1289]], dtype=float32)),\n",
-       "  ('o0',\n",
-       "   array([[ 640.916  , 1389.709  ],\n",
-       "          [ 691.68164, 1389.709  ],\n",
-       "          [ 691.68164, 1434.1289 ],\n",
-       "          [ 640.916  , 1434.1289 ]], dtype=float32)),\n",
-       "  ('change',\n",
-       "   array([[  53.938477, 1440.4746  ],\n",
-       "          [ 187.19824 , 1440.4746  ],\n",
-       "          [ 187.19824 , 1484.8945  ],\n",
-       "          [  53.938477, 1484.8945  ]], dtype=float32)),\n",
-       "  ('841',\n",
-       "   array([[ 558.4219, 1440.4746],\n",
-       "          [ 628.2246, 1440.4746],\n",
-       "          [ 628.2246, 1484.8945],\n",
-       "          [ 558.4219, 1484.8945]], dtype=float32)),\n",
-       "  ('sdo',\n",
-       "   array([[ 625.05176, 1440.4746 ],\n",
-       "          [ 694.8545 , 1440.4746 ],\n",
-       "          [ 694.8545 , 1484.8945 ],\n",
-       "          [ 625.05176, 1484.8945 ]], dtype=float32)),\n",
-       "  ('vatable',\n",
-       "   array([[  53.938477, 1545.1787  ],\n",
-       "          [ 209.4082  , 1545.1787  ],\n",
-       "          [ 209.4082  , 1589.5986  ],\n",
-       "          [  53.938477, 1589.5986  ]], dtype=float32)),\n",
-       "  ('szos',\n",
-       "   array([[ 558.4219 , 1545.1787 ],\n",
-       "          [ 644.08887, 1545.1787 ],\n",
-       "          [ 644.08887, 1589.5986 ],\n",
-       "          [ 558.4219 , 1589.5986 ]], dtype=float32)),\n",
-       "  ('54',\n",
-       "   array([[ 640.916  , 1545.1787 ],\n",
-       "          [ 691.68164, 1545.1787 ],\n",
-       "          [ 691.68164, 1589.5986 ],\n",
-       "          [ 640.916  , 1589.5986 ]], dtype=float32)),\n",
-       "  ('vat',\n",
-       "   array([[  53.938477, 1595.9443  ],\n",
-       "          [ 145.95117 , 1595.9443  ],\n",
-       "          [ 145.95117 , 1646.71    ],\n",
-       "          [  53.938477, 1646.71    ]], dtype=float32)),\n",
-       "  ('8b',\n",
-       "   array([[ 580.63184, 1595.9443 ],\n",
-       "          [ 644.08887, 1595.9443 ],\n",
-       "          [ 644.08887, 1640.3643 ],\n",
-       "          [ 580.63184, 1640.3643 ]], dtype=float32)),\n",
-       "  ('tax',\n",
-       "   array([[ 139.60547, 1599.1172 ],\n",
-       "          [ 209.4082 , 1599.1172 ],\n",
-       "          [ 209.4082 , 1640.3643 ],\n",
-       "          [ 139.60547, 1640.3643 ]], dtype=float32)),\n",
-       "  ('4g',\n",
-       "   array([[ 644.08887, 1599.1172 ],\n",
-       "          [ 691.68164, 1599.1172 ],\n",
-       "          [ 691.68164, 1640.3643 ],\n",
-       "          [ 644.08887, 1640.3643 ]], dtype=float32)),\n",
-       "  ('zerd',\n",
-       "   array([[  53.938477, 1646.71    ],\n",
-       "          [ 149.12402 , 1646.71    ],\n",
-       "          [ 149.12402 , 1694.3027  ],\n",
-       "          [  53.938477, 1694.3027  ]], dtype=float32)),\n",
-       "  ('ra',\n",
-       "   array([[ 158.64258, 1649.8828 ],\n",
-       "          [ 209.4082 , 1649.8828 ],\n",
-       "          [ 209.4082 , 1694.3027 ],\n",
-       "          [ 158.64258, 1694.3027 ]], dtype=float32)),\n",
-       "  ('ted',\n",
-       "   array([[ 203.0625 , 1649.8828 ],\n",
-       "          [ 272.86523, 1649.8828 ],\n",
-       "          [ 272.86523, 1691.1299 ],\n",
-       "          [ 203.0625 , 1691.1299 ]], dtype=float32)),\n",
-       "  ('0',\n",
-       "   array([[ 599.66895, 1649.8828 ],\n",
-       "          [ 628.2246 , 1649.8828 ],\n",
-       "          [ 628.2246 , 1687.957  ],\n",
-       "          [ 599.66895, 1687.957  ]], dtype=float32)),\n",
-       "  ('00',\n",
-       "   array([[ 640.916 , 1649.8828],\n",
-       "          [ 694.8545, 1649.8828],\n",
-       "          [ 694.8545, 1691.1299],\n",
-       "          [ 640.916 , 1691.1299]], dtype=float32)),\n",
-       "  ('vat',\n",
-       "   array([[  53.938477, 1700.6484  ],\n",
-       "          [ 123.74121 , 1700.6484  ],\n",
-       "          [ 123.74121 , 1745.0684  ],\n",
-       "          [  53.938477, 1745.0684  ]], dtype=float32)),\n",
-       "  ('mexept',\n",
-       "   array([[ 117.39551, 1700.6484 ],\n",
-       "          [ 257.00098, 1700.6484 ],\n",
-       "          [ 257.00098, 1748.2412 ],\n",
-       "          [ 117.39551, 1748.2412 ]], dtype=float32)),\n",
-       "  ('ted',\n",
-       "   array([[ 247.48242, 1700.6484 ],\n",
-       "          [ 314.1123 , 1700.6484 ],\n",
-       "          [ 314.1123 , 1745.0684 ],\n",
-       "          [ 247.48242, 1745.0684 ]], dtype=float32)),\n",
-       "  ('0',\n",
-       "   array([[ 602.8418, 1703.8213],\n",
-       "          [ 628.2246, 1703.8213],\n",
-       "          [ 628.2246, 1738.7227],\n",
-       "          [ 602.8418, 1738.7227]], dtype=float32)),\n",
-       "  ('od',\n",
-       "   array([[ 640.916  , 1703.8213 ],\n",
-       "          [ 691.68164, 1703.8213 ],\n",
-       "          [ 691.68164, 1741.8955 ],\n",
-       "          [ 640.916  , 1741.8955 ]], dtype=float32)),\n",
-       "  ('7616664',\n",
-       "   array([[ 329.97656, 1799.0068 ],\n",
-       "          [ 482.27344, 1799.0068 ],\n",
-       "          [ 482.27344, 1846.5996 ],\n",
-       "          [ 329.97656, 1846.5996 ]], dtype=float32)),\n",
-       "  ('sol',\n",
-       "   array([[  12.691406, 1802.1797  ],\n",
-       "          [  79.32129 , 1802.1797  ],\n",
-       "          [  79.32129 , 1846.5996  ],\n",
-       "          [  12.691406, 1846.5996  ]], dtype=float32)),\n",
-       "  ('d',\n",
-       "   array([[  79.32129, 1805.3525 ],\n",
-       "          [ 101.53125, 1805.3525 ],\n",
-       "          [ 101.53125, 1843.4268 ],\n",
-       "          [  79.32129, 1843.4268 ]], dtype=float32)),\n",
-       "  ('tos',\n",
-       "   array([[ 120.56836, 1802.1797 ],\n",
-       "          [ 184.02539, 1802.1797 ],\n",
-       "          [ 184.02539, 1846.5996 ],\n",
-       "          [ 120.56836, 1846.5996 ]], dtype=float32)),\n",
-       "  ('hobos',\n",
-       "   array([[ 203.0625, 1802.1797],\n",
-       "          [ 333.1494, 1802.1797],\n",
-       "          [ 333.1494, 1846.5996],\n",
-       "          [ 203.0625, 1846.5996]], dtype=float32)),\n",
-       "  ('name',\n",
-       "   array([[  12.691406, 1852.9453  ],\n",
-       "          [ 104.7041  , 1852.9453  ],\n",
-       "          [ 104.7041  , 1897.3652  ],\n",
-       "          [  12.691406, 1897.3652  ]], dtype=float32)),\n",
-       "  ('eeten',\n",
-       "   array([[ 126.91406, 1887.8467 ],\n",
-       "          [ 199.88965, 1887.8467 ],\n",
-       "          [ 199.88965, 1897.3652 ],\n",
-       "          [ 126.91406, 1897.3652 ]], dtype=float32)),\n",
-       "  ('addr',\n",
-       "   array([[  12.691406, 1906.8838  ],\n",
-       "          [ 104.7041  , 1906.8838  ],\n",
-       "          [ 104.7041  , 1948.1309  ],\n",
-       "          [  12.691406, 1948.1309  ]], dtype=float32)),\n",
-       "  ('ess',\n",
-       "   array([[  98.3584 , 1910.0566 ],\n",
-       "          [ 168.16113, 1910.0566 ],\n",
-       "          [ 168.16113, 1951.3037 ],\n",
-       "          [  98.3584 , 1951.3037 ]], dtype=float32)),\n",
-       "  ('tins',\n",
-       "   array([[  12.691406, 1954.4766  ],\n",
-       "          [  98.3584  , 1954.4766  ],\n",
-       "          [  98.3584  , 1998.8965  ],\n",
-       "          [  12.691406, 1998.8965  ]], dtype=float32)),\n",
-       "  ('fpti',\n",
-       "   array([[  13.045723, 2057.3926  ],\n",
-       "          [  81.36672 , 2062.2727  ],\n",
-       "          [  78.322716, 2104.889   ],\n",
-       "          [  10.001719, 2100.0088  ]], dtype=float32)),\n",
-       "  ('ippl',\n",
-       "   array([[ 101.53125, 2059.1807 ],\n",
-       "          [ 171.33398, 2059.1807 ],\n",
-       "          [ 171.33398, 2106.7734 ],\n",
-       "          [ 101.53125, 2106.7734 ]], dtype=float32)),\n",
-       "  ('seven',\n",
-       "   array([[ 241.13672, 2059.1807 ],\n",
-       "          [ 355.35938, 2059.1807 ],\n",
-       "          [ 355.35938, 2103.6006 ],\n",
-       "          [ 241.13672, 2103.6006 ]], dtype=float32)),\n",
-       "  ('corpor',\n",
-       "   array([[ 371.50757, 2057.8103 ],\n",
-       "          [ 499.50806, 2065.8103 ],\n",
-       "          [ 496.71796, 2110.4524 ],\n",
-       "          [ 368.7175 , 2102.4526 ]], dtype=float32)),\n",
-       "  ('s',\n",
-       "   array([[ 164.98828, 2065.5264 ],\n",
-       "          [ 180.85254, 2065.5264 ],\n",
-       "          [ 180.85254, 2100.4277 ],\n",
-       "          [ 164.98828, 2100.4277 ]], dtype=float32)),\n",
-       "  ('ne',\n",
-       "   array([[ 177.67969, 2062.3535 ],\n",
-       "          [ 228.44531, 2062.3535 ],\n",
-       "          [ 228.44531, 2103.6006 ],\n",
-       "          [ 177.67969, 2103.6006 ]], dtype=float32)),\n",
-       "  ('at',\n",
-       "   array([[ 494.96484, 2062.3535 ],\n",
-       "          [ 542.5576 , 2062.3535 ],\n",
-       "          [ 542.5576 , 2103.6006 ],\n",
-       "          [ 494.96484, 2103.6006 ]], dtype=float32)),\n",
-       "  ('on',\n",
-       "   array([[ 558.4219, 2065.5264],\n",
-       "          [ 609.1875, 2065.5264],\n",
-       "          [ 609.1875, 2103.6006],\n",
-       "          [ 558.4219, 2103.6006]], dtype=float32)),\n",
-       "  ('jth',\n",
-       "   array([[  12.691406, 2109.9463  ],\n",
-       "          [  82.49414 , 2109.9463  ],\n",
-       "          [  82.49414 , 2154.3662  ],\n",
-       "          [  12.691406, 2154.3662  ]], dtype=float32)),\n",
-       "  ('the',\n",
-       "   array([[ 225.27246, 2109.9463 ],\n",
-       "          [ 291.90234, 2109.9463 ],\n",
-       "          [ 291.90234, 2154.3662 ],\n",
-       "          [ 225.27246, 2154.3662 ]], dtype=float32)),\n",
-       "  ('co',\n",
-       "   array([[ 304.59375, 2109.9463 ],\n",
-       "          [ 355.35938, 2109.9463 ],\n",
-       "          [ 355.35938, 2154.3662 ],\n",
-       "          [ 304.59375, 2154.3662 ]], dtype=float32)),\n",
-       "  ('tower',\n",
-       "   array([[ 498.1377 , 2109.9463 ],\n",
-       "          [ 606.01465, 2109.9463 ],\n",
-       "          [ 606.01465, 2154.3662 ],\n",
-       "          [ 498.1377 , 2154.3662 ]], dtype=float32)),\n",
-       "  ('f',\n",
-       "   array([[  95.18555, 2113.1191 ],\n",
-       "          [ 120.56836, 2113.1191 ],\n",
-       "          [ 120.56836, 2151.1934 ],\n",
-       "          [  95.18555, 2151.1934 ]], dtype=float32)),\n",
-       "  ('t',\n",
-       "   array([[ 352.18652, 2116.292  ],\n",
-       "          [ 368.05078, 2116.292  ],\n",
-       "          [ 368.05078, 2151.1934 ],\n",
-       "          [ 352.18652, 2151.1934 ]], dtype=float32)),\n",
-       "  ('iqor',\n",
-       "   array([[ 120.56836, 2116.292  ],\n",
-       "          [ 206.23535, 2116.292  ],\n",
-       "          [ 206.23535, 2154.3662 ],\n",
-       "          [ 120.56836, 2154.3662 ]], dtype=float32)),\n",
-       "  ('umb',\n",
-       "   array([[ 368.05078, 2116.292  ],\n",
-       "          [ 441.02637, 2116.292  ],\n",
-       "          [ 441.02637, 2154.3662 ],\n",
-       "          [ 368.05078, 2154.3662 ]], dtype=float32)),\n",
-       "  ('id',\n",
-       "   array([[ 434.68066, 2116.292  ],\n",
-       "          [ 479.1006 , 2116.292  ],\n",
-       "          [ 479.1006 , 2154.3662 ],\n",
-       "          [ 434.68066, 2154.3662 ]], dtype=float32)),\n",
-       "  ('avenues',\n",
-       "   array([[ 203.0625 , 2160.712  ],\n",
-       "          [ 349.01367, 2160.712  ],\n",
-       "          [ 349.01367, 2208.3047 ],\n",
-       "          [ 203.0625 , 2208.3047 ]], dtype=float32)),\n",
-       "  ('ort',\n",
-       "   array([[  31.728516, 2163.8848  ],\n",
-       "          [ 101.53125 , 2163.8848  ],\n",
-       "          [ 101.53125 , 2205.1318  ],\n",
-       "          [  31.728516, 2205.1318  ]], dtype=float32)),\n",
-       "  ('i',\n",
-       "   array([[ 101.53125, 2167.0576 ],\n",
-       "          [ 114.22266, 2167.0576 ],\n",
-       "          [ 114.22266, 2198.7861 ],\n",
-       "          [ 101.53125, 2198.7861 ]], dtype=float32)),\n",
-       "  ('manda',\n",
-       "   array([[ 368.05078, 2163.8848 ],\n",
-       "          [ 479.1006 , 2163.8848 ],\n",
-       "          [ 479.1006 , 2205.1318 ],\n",
-       "          [ 368.05078, 2205.1318 ]], dtype=float32)),\n",
-       "  ('gas',\n",
-       "   array([[ 117.39551, 2167.0576 ],\n",
-       "          [ 187.19824, 2167.0576 ],\n",
-       "          [ 187.19824, 2211.4775 ],\n",
-       "          [ 117.39551, 2211.4775 ]], dtype=float32)),\n",
-       "  ('l',\n",
-       "   array([[ 479.1006 , 2170.2305 ],\n",
-       "          [ 488.61914, 2170.2305 ],\n",
-       "          [ 488.61914, 2195.6133 ],\n",
-       "          [ 479.1006 , 2195.6133 ]], dtype=float32)),\n",
-       "  ('lyonig',\n",
-       "   array([[ 494.96484, 2170.2305 ],\n",
-       "          [ 606.01465, 2170.2305 ],\n",
-       "          [ 606.01465, 2211.4775 ],\n",
-       "          [ 494.96484, 2211.4775 ]], dtype=float32)),\n",
-       "  ('ci',\n",
-       "   array([[  31.728516, 2214.6504  ],\n",
-       "          [  79.32129 , 2214.6504  ],\n",
-       "          [  79.32129 , 2259.0703  ],\n",
-       "          [  31.728516, 2259.0703  ]], dtype=float32)),\n",
-       "  ('ty',\n",
-       "   array([[  76.14844, 2217.8232 ],\n",
-       "          [ 123.74121, 2217.8232 ],\n",
-       "          [ 123.74121, 2259.0703 ],\n",
-       "          [  76.14844, 2259.0703 ]], dtype=float32)),\n",
-       "  ('sgrooo',\n",
-       "   array([[ 304.59375, 2262.2432 ],\n",
-       "          [ 441.02637, 2262.2432 ],\n",
-       "          [ 441.02637, 2309.836  ],\n",
-       "          [ 304.59375, 2309.836  ]], dtype=float32)),\n",
-       "  ('tins',\n",
-       "   array([[  15.864258, 2265.416   ],\n",
-       "          [  98.3584  , 2265.416   ],\n",
-       "          [  98.3584  , 2309.836   ],\n",
-       "          [  15.864258, 2309.836   ]], dtype=float32)),\n",
-       "  ('doo',\n",
-       "   array([[ 117.39551, 2265.416  ],\n",
-       "          [ 203.0625 , 2265.416  ],\n",
-       "          [ 203.0625 , 2309.836  ],\n",
-       "          [ 117.39551, 2309.836  ]], dtype=float32)),\n",
-       "  ('sioul',\n",
-       "   array([[ 199.88965, 2265.416  ],\n",
-       "          [ 310.93945, 2265.416  ],\n",
-       "          [ 310.93945, 2309.836  ],\n",
-       "          [ 199.88965, 2309.836  ]], dtype=float32)),\n",
-       "  ('bir',\n",
-       "   array([[  12.691406, 2316.1816  ],\n",
-       "          [  82.49414 , 2316.1816  ],\n",
-       "          [  82.49414 , 2360.6016  ],\n",
-       "          [  12.691406, 2360.6016  ]], dtype=float32)),\n",
-       "  ('accr',\n",
-       "   array([[  95.18555, 2319.3545 ],\n",
-       "          [ 187.19824, 2319.3545 ],\n",
-       "          [ 187.19824, 2363.7744 ],\n",
-       "          [  95.18555, 2363.7744 ]], dtype=float32)),\n",
-       "  ('h',\n",
-       "   array([[ 203.0625 , 2322.5273 ],\n",
-       "          [ 225.27246, 2322.5273 ],\n",
-       "          [ 225.27246, 2357.4287 ],\n",
-       "          [ 203.0625 , 2357.4287 ]], dtype=float32)),\n",
-       "  ('smooojso1',\n",
-       "   array([[  72.975586, 2366.9473  ],\n",
-       "          [ 263.34668 , 2366.9473  ],\n",
-       "          [ 263.34668 , 2411.3672  ],\n",
-       "          [  72.975586, 2411.3672  ]], dtype=float32)),\n",
-       "  ('sjuousa',\n",
-       "   array([[ 263.34668, 2366.9473 ],\n",
-       "          [ 479.1006 , 2366.9473 ],\n",
-       "          [ 479.1006 , 2411.3672 ],\n",
-       "          [ 263.34668, 2411.3672 ]], dtype=float32)),\n",
-       "  ('96oz',\n",
-       "   array([[ 494.96484, 2366.9473 ],\n",
-       "          [ 586.97754, 2366.9473 ],\n",
-       "          [ 586.97754, 2411.3672 ],\n",
-       "          [ 494.96484, 2411.3672 ]], dtype=float32)),\n",
-       "  ('11',\n",
-       "   array([[  34.901367, 2370.12    ],\n",
-       "          [  79.32129 , 2370.12    ],\n",
-       "          [  79.32129 , 2411.3672  ],\n",
-       "          [  34.901367, 2411.3672  ]], dtype=float32)),\n",
-       "  ('accrdater',\n",
-       "   array([[  12.691406, 2417.713   ],\n",
-       "          [ 203.0625  , 2417.713   ],\n",
-       "          [ 203.0625  , 2465.3057  ],\n",
-       "          [  12.691406, 2465.3057  ]], dtype=float32)),\n",
-       "  ('d8i01',\n",
-       "   array([[ 222.09961, 2417.713  ],\n",
-       "          [ 329.97656, 2417.713  ],\n",
-       "          [ 329.97656, 2462.1328 ],\n",
-       "          [ 222.09961, 2462.1328 ]], dtype=float32)),\n",
-       "  ('220',\n",
-       "   array([[ 329.97656, 2417.713  ],\n",
-       "          [ 441.02637, 2417.713  ],\n",
-       "          [ 441.02637, 2462.1328 ],\n",
-       "          [ 329.97656, 2462.1328 ]], dtype=float32)),\n",
-       "  ('17',\n",
-       "   array([[  31.728516, 2471.6514  ],\n",
-       "          [  85.66699 , 2471.6514  ],\n",
-       "          [  85.66699 , 2512.8984  ],\n",
-       "          [  31.728516, 2512.8984  ]], dtype=float32)),\n",
-       "  ('151',\n",
-       "   array([[  76.14844, 2471.6514 ],\n",
-       "          [ 139.60547, 2471.6514 ],\n",
-       "          [ 139.60547, 2516.0713 ],\n",
-       "          [  76.14844, 2516.0713 ]], dtype=float32)),\n",
-       "  ('izoz5',\n",
-       "   array([[ 139.60547, 2471.6514 ],\n",
-       "          [ 250.65527, 2471.6514 ],\n",
-       "          [ 250.65527, 2516.0713 ],\n",
-       "          [ 139.60547, 2516.0713 ]], dtype=float32)),\n",
-       "  ('fermi',\n",
-       "   array([[  12.691406, 2519.2441  ],\n",
-       "          [ 120.56836 , 2519.2441  ],\n",
-       "          [ 120.56836 , 2566.837   ],\n",
-       "          [  12.691406, 2566.837   ]], dtype=float32)),\n",
-       "  ('t',\n",
-       "   array([[ 117.39551, 2525.5898 ],\n",
-       "          [ 142.77832, 2525.5898 ],\n",
-       "          [ 142.77832, 2563.664  ],\n",
-       "          [ 117.39551, 2563.664  ]], dtype=float32)),\n",
-       "  ('hs',\n",
-       "   array([[ 158.64258, 2525.5898 ],\n",
-       "          [ 199.88965, 2525.5898 ],\n",
-       "          [ 199.88965, 2563.664  ],\n",
-       "          [ 158.64258, 2563.664  ]], dtype=float32)),\n",
-       "  ('fpzoirtias',\n",
-       "   array([[  31.728516, 2570.0098  ],\n",
-       "          [ 479.1006  , 2570.0098  ],\n",
-       "          [ 479.1006  , 2617.6025  ],\n",
-       "          [  31.728516, 2617.6025  ]], dtype=float32)),\n",
-       "  ('dooniz',\n",
-       "   array([[ 469.58203, 2573.1826 ],\n",
-       "          [ 586.97754, 2573.1826 ],\n",
-       "          [ 586.97754, 2617.6025 ],\n",
-       "          [ 469.58203, 2617.6025 ]], dtype=float32)),\n",
-       "  ('get',\n",
-       "   array([[  31.728516, 2674.7139  ],\n",
-       "          [ 101.53125 , 2674.7139  ],\n",
-       "          [ 101.53125 , 2719.1338  ],\n",
-       "          [  31.728516, 2719.1338  ]], dtype=float32)),\n",
-       "  ('for',\n",
-       "   array([[ 602.8418, 2674.7139],\n",
-       "          [ 669.4717, 2674.7139],\n",
-       "          [ 669.4717, 2719.1338],\n",
-       "          [ 602.8418, 2719.1338]], dtype=float32)),\n",
-       "  ('chance',\n",
-       "   array([[ 158.87543, 2676.548  ],\n",
-       "          [ 292.87747, 2680.6086 ],\n",
-       "          [ 291.59088, 2723.0664 ],\n",
-       "          [ 157.58882, 2719.0059 ]], dtype=float32)),\n",
-       "  ('to',\n",
-       "   array([[ 304.59375, 2677.8867 ],\n",
-       "          [ 355.35938, 2677.8867 ],\n",
-       "          [ 355.35938, 2719.1338 ],\n",
-       "          [ 304.59375, 2719.1338 ]], dtype=float32)),\n",
-       "  ('win',\n",
-       "   array([[ 368.05078, 2677.8867 ],\n",
-       "          [ 441.02637, 2677.8867 ],\n",
-       "          [ 441.02637, 2722.3066 ],\n",
-       "          [ 368.05078, 2722.3066 ]], dtype=float32)),\n",
-       "  ('trip',\n",
-       "   array([[ 494.96484, 2677.8867 ],\n",
-       "          [ 586.97754, 2677.8867 ],\n",
-       "          [ 586.97754, 2722.3066 ],\n",
-       "          [ 494.96484, 2722.3066 ]], dtype=float32)),\n",
-       "  ('t',\n",
-       "   array([[ 114.22266, 2681.0596 ],\n",
-       "          [ 139.60547, 2681.0596 ],\n",
-       "          [ 139.60547, 2715.961  ],\n",
-       "          [ 114.22266, 2715.961  ]], dtype=float32)),\n",
-       "  ('a',\n",
-       "   array([[ 453.71777, 2687.4053 ],\n",
-       "          [ 475.92773, 2687.4053 ],\n",
-       "          [ 475.92773, 2719.1338 ],\n",
-       "          [ 453.71777, 2719.1338 ]], dtype=float32)),\n",
-       "  ('f',\n",
-       "   array([[  57.11133, 2731.8252 ],\n",
-       "          [  79.32129, 2731.8252 ],\n",
-       "          [  79.32129, 2769.8994 ],\n",
-       "          [  57.11133, 2769.8994 ]], dtype=float32)),\n",
-       "  ('to',\n",
-       "   array([[  95.18555, 2728.6523 ],\n",
-       "          [ 142.77832, 2728.6523 ],\n",
-       "          [ 142.77832, 2773.0723 ],\n",
-       "          [  95.18555, 2773.0723 ]], dtype=float32)),\n",
-       "  ('kored',\n",
-       "   array([[ 158.64258, 2728.6523 ],\n",
-       "          [ 269.69238, 2728.6523 ],\n",
-       "          [ 269.69238, 2773.0723 ],\n",
-       "          [ 158.64258, 2773.0723 ]], dtype=float32)),\n",
-       "  ('pis0',\n",
-       "   array([[ 558.4219, 2728.6523],\n",
-       "          [ 650.4346, 2728.6523],\n",
-       "          [ 650.4346, 2773.0723],\n",
-       "          [ 558.4219, 2773.0723]], dtype=float32)),\n",
-       "  ('when',\n",
-       "   array([[ 285.55664, 2731.8252 ],\n",
-       "          [ 377.56934, 2731.8252 ],\n",
-       "          [ 377.56934, 2773.0723 ],\n",
-       "          [ 285.55664, 2773.0723 ]], dtype=float32)),\n",
-       "  ('buy',\n",
-       "   array([[ 472.75488, 2731.8252 ],\n",
-       "          [ 542.5576 , 2731.8252 ],\n",
-       "          [ 542.5576 , 2773.0723 ],\n",
-       "          [ 472.75488, 2773.0723 ]], dtype=float32)),\n",
-       "  ('you',\n",
-       "   array([[ 390.26074, 2734.998  ],\n",
-       "          [ 460.06348, 2734.998  ],\n",
-       "          [ 460.06348, 2776.245  ],\n",
-       "          [ 390.26074, 2776.245  ]], dtype=float32)),\n",
-       "  ('of',\n",
-       "   array([[ 158.64258, 2779.418  ],\n",
-       "          [ 206.23535, 2779.418  ],\n",
-       "          [ 206.23535, 2823.838  ],\n",
-       "          [ 158.64258, 2823.838  ]], dtype=float32)),\n",
-       "  ('jel',\n",
-       "   array([[ 225.27246, 2779.418  ],\n",
-       "          [ 307.7666 , 2779.418  ],\n",
-       "          [ 307.7666 , 2823.838  ],\n",
-       "          [ 225.27246, 2823.838  ]], dtype=float32)),\n",
-       "  ('worth',\n",
-       "   array([[  31.728516, 2782.5908  ],\n",
-       "          [ 145.95117 , 2782.5908  ],\n",
-       "          [ 145.95117 , 2830.1836  ],\n",
-       "          [  31.728516, 2830.1836  ]], dtype=float32)),\n",
-       "  ('tens',\n",
-       "   array([[ 434.68066, 2782.5908 ],\n",
-       "          [ 533.03906, 2782.5908 ],\n",
-       "          [ 533.03906, 2820.665  ],\n",
-       "          [ 434.68066, 2820.665  ]], dtype=float32)),\n",
-       "  ('ean',\n",
-       "   array([[ 558.4219, 2782.5908],\n",
-       "          [ 650.4346, 2782.5908],\n",
-       "          [ 650.4346, 2823.838 ],\n",
-       "          [ 558.4219, 2823.838 ]], dtype=float32)),\n",
-       "  ('even',\n",
-       "   array([[ 304.59375, 2785.7637 ],\n",
-       "          [ 396.60645, 2785.7637 ],\n",
-       "          [ 396.60645, 2823.838  ],\n",
-       "          [ 304.59375, 2823.838  ]], dtype=float32)),\n",
-       "  ('s',\n",
-       "   array([[  31.728516, 2830.1836  ],\n",
-       "          [  57.11133 , 2830.1836  ],\n",
-       "          [  57.11133 , 2871.4307  ],\n",
-       "          [  31.728516, 2871.4307  ]], dtype=float32)),\n",
-       "  ('era',\n",
-       "   array([[  72.975586, 2830.1836  ],\n",
-       "          [ 142.77832 , 2830.1836  ],\n",
-       "          [ 142.77832 , 2877.7764  ],\n",
-       "          [  72.975586, 2877.7764  ]], dtype=float32)),\n",
-       "  ('ffle',\n",
-       "   array([[ 139.60547, 2830.1836 ],\n",
-       "          [ 228.44531, 2830.1836 ],\n",
-       "          [ 228.44531, 2877.7764 ],\n",
-       "          [ 139.60547, 2877.7764 ]], dtype=float32)),\n",
-       "  ('entr',\n",
-       "   array([[ 241.13672, 2833.3564 ],\n",
-       "          [ 336.32227, 2833.3564 ],\n",
-       "          [ 336.32227, 2874.6035 ],\n",
-       "          [ 241.13672, 2874.6035 ]], dtype=float32)),\n",
-       "  ('ies',\n",
-       "   array([[ 329.97656, 2836.5293 ],\n",
-       "          [ 393.4336 , 2836.5293 ],\n",
-       "          [ 393.4336 , 2871.4307 ],\n",
-       "          [ 329.97656, 2871.4307 ]], dtype=float32)),\n",
-       "  ('aphen',\n",
-       "   array([[ 412.4707 , 2836.5293 ],\n",
-       "          [ 501.31055, 2836.5293 ],\n",
-       "          [ 501.31055, 2877.7764 ],\n",
-       "          [ 412.4707 , 2877.7764 ]], dtype=float32)),\n",
-       "  ('duy',\n",
-       "   array([[ 596.4961, 2836.5293],\n",
-       "          [ 669.4717, 2836.5293],\n",
-       "          [ 669.4717, 2877.7764],\n",
-       "          [ 596.4961, 2877.7764]], dtype=float32)),\n",
-       "  ('you',\n",
-       "   array([[ 517.1748 , 2839.7021 ],\n",
-       "          [ 586.97754, 2839.7021 ],\n",
-       "          [ 586.97754, 2877.7764 ],\n",
-       "          [ 517.1748 , 2877.7764 ]], dtype=float32)),\n",
-       "  ('dis',\n",
-       "   array([[  31.728516, 2884.122   ],\n",
-       "          [  79.32129 , 2884.122   ],\n",
-       "          [  79.32129 , 2925.3691  ],\n",
-       "          [  31.728516, 2925.3691  ]], dtype=float32)),\n",
-       "  ('scdunted',\n",
-       "   array([[  76.14844, 2884.122  ],\n",
-       "          [ 250.65527, 2884.122  ],\n",
-       "          [ 250.65527, 2928.542  ],\n",
-       "          [  76.14844, 2928.542  ]], dtype=float32)),\n",
-       "  ('booster',\n",
-       "   array([[ 263.34668, 2884.122  ],\n",
-       "          [ 415.64355, 2884.122  ],\n",
-       "          [ 415.64355, 2928.542  ],\n",
-       "          [ 263.34668, 2928.542  ]], dtype=float32)),\n",
-       "  ('tenss',\n",
-       "   array([[ 453.71777, 2884.122  ],\n",
-       "          [ 548.9033 , 2884.122  ],\n",
-       "          [ 548.9033 , 2928.542  ],\n",
-       "          [ 453.71777, 2928.542  ]], dtype=float32)),\n",
-       "  ('fper',\n",
-       "   array([[ 577.459 , 2884.122 ],\n",
-       "          [ 647.2617, 2884.122 ],\n",
-       "          [ 647.2617, 2928.542 ],\n",
-       "          [ 577.459 , 2928.542 ]], dtype=float32)),\n",
-       "  ('dii',\n",
-       "   array([[  31.728516, 2934.8877  ],\n",
-       "          [ 101.53125 , 2934.8877  ],\n",
-       "          [ 101.53125 , 2979.3076  ],\n",
-       "          [  31.728516, 2979.3076  ]], dtype=float32)),\n",
-       "  ('fair',\n",
-       "   array([[ 117.39551, 2934.8877 ],\n",
-       "          [ 209.4082 , 2934.8877 ],\n",
-       "          [ 209.4082 , 2979.3076 ],\n",
-       "          [ 117.39551, 2979.3076 ]], dtype=float32)),\n",
-       "  ('trade',\n",
-       "   array([[ 222.09961, 2934.8877 ],\n",
-       "          [ 333.1494 , 2934.8877 ],\n",
-       "          [ 333.1494 , 2979.3076 ],\n",
-       "          [ 222.09961, 2979.3076 ]], dtype=float32)),\n",
-       "  ('permt',\n",
-       "   array([[ 346.57706, 2933.5906 ],\n",
-       "          [ 458.4858 , 2939.4805 ],\n",
-       "          [ 456.2683 , 2981.6128 ],\n",
-       "          [ 344.35956, 2975.7231 ]], dtype=float32)),\n",
-       "  ('nunbers',\n",
-       "   array([[ 494.96484, 2934.8877 ],\n",
-       "          [ 644.08887, 2934.8877 ],\n",
-       "          [ 644.08887, 2979.3076 ],\n",
-       "          [ 494.96484, 2979.3076 ]], dtype=float32)),\n",
-       "  ('t',\n",
-       "   array([[ 453.71777, 2941.2334 ],\n",
-       "          [ 475.92773, 2941.2334 ],\n",
-       "          [ 475.92773, 2976.1348 ],\n",
-       "          [ 453.71777, 2976.1348 ]], dtype=float32)),\n",
-       "  ('18015',\n",
-       "   array([[ 117.39551, 2985.6533 ],\n",
-       "          [ 247.48242, 2985.6533 ],\n",
-       "          [ 247.48242, 3030.0732 ],\n",
-       "          [ 117.39551, 3030.0732 ]], dtype=float32)),\n",
-       "  ('series',\n",
-       "   array([[ 263.34668, 2985.6533 ],\n",
-       "          [ 396.60645, 2985.6533 ],\n",
-       "          [ 396.60645, 3030.0732 ],\n",
-       "          [ 263.34668, 3030.0732 ]], dtype=float32)),\n",
-       "  ('of',\n",
-       "   array([[ 409.29785, 2985.6533 ],\n",
-       "          [ 456.89062, 2985.6533 ],\n",
-       "          [ 456.89062, 3030.0732 ],\n",
-       "          [ 409.29785, 3030.0732 ]], dtype=float32)),\n",
-       "  ('edz5',\n",
-       "   array([[ 472.75488, 2985.6533 ],\n",
-       "          [ 571.1133 , 2985.6533 ],\n",
-       "          [ 571.1133 , 3026.9004 ],\n",
-       "          [ 472.75488, 3026.9004 ]], dtype=float32)),\n",
-       "  ('facebooks',\n",
-       "   array([[  53.938477, 3036.419   ],\n",
-       "          [ 234.79102 , 3036.419   ],\n",
-       "          [ 234.79102 , 3080.8389  ],\n",
-       "          [  53.938477, 3080.8389  ]], dtype=float32)),\n",
-       "  ('71',\n",
-       "   array([[ 329.97656, 3036.419  ],\n",
-       "          [ 371.22363, 3036.419  ],\n",
-       "          [ 371.22363, 3077.666  ],\n",
-       "          [ 329.97656, 3077.666  ]], dtype=float32)),\n",
-       "  ('iphi',\n",
-       "   array([[ 371.22363, 3036.419  ],\n",
-       "          [ 456.89062, 3036.419  ],\n",
-       "          [ 456.89062, 3080.8389 ],\n",
-       "          [ 371.22363, 3080.8389 ]], dtype=float32)),\n",
-       "  ('comf',\n",
-       "   array([[ 241.13672, 3039.5918 ],\n",
-       "          [ 333.1494 , 3039.5918 ],\n",
-       "          [ 333.1494 , 3080.8389 ],\n",
-       "          [ 241.13672, 3080.8389 ]], dtype=float32)),\n",
-       "  ('l',\n",
-       "   array([[ 456.89062, 3042.7646 ],\n",
-       "          [ 469.58203, 3042.7646 ],\n",
-       "          [ 469.58203, 3071.3203 ],\n",
-       "          [ 456.89062, 3071.3203 ]], dtype=float32)),\n",
-       "  ('pp',\n",
-       "   array([[ 491.792 , 3039.5918],\n",
-       "          [ 542.5576, 3039.5918],\n",
-       "          [ 542.5576, 3080.8389],\n",
-       "          [ 491.792 , 3080.8389]], dtype=float32)),\n",
-       "  ('fes',\n",
-       "   array([[ 552.0762 , 3039.5918 ],\n",
-       "          [ 631.39746, 3039.5918 ],\n",
-       "          [ 631.39746, 3077.666  ],\n",
-       "          [ 552.0762 , 3077.666  ]], dtype=float32)),\n",
-       "  ('this',\n",
-       "   array([[  53.938477, 3137.9502  ],\n",
-       "          [ 142.77832 , 3137.9502  ],\n",
-       "          [ 142.77832 , 3185.543   ],\n",
-       "          [  53.938477, 3185.543   ]], dtype=float32)),\n",
-       "  ('is',\n",
-       "   array([[ 158.64258, 3137.9502 ],\n",
-       "          [ 206.23535, 3137.9502 ],\n",
-       "          [ 206.23535, 3185.543  ],\n",
-       "          [ 158.64258, 3185.543  ]], dtype=float32)),\n",
-       "  ('official',\n",
-       "   array([[ 282.3838 , 3137.9502 ],\n",
-       "          [ 456.89062, 3137.9502 ],\n",
-       "          [ 456.89062, 3185.543  ],\n",
-       "          [ 282.3838 , 3185.543  ]], dtype=float32)),\n",
-       "  ('receift',\n",
-       "   array([[ 472.75488, 3137.9502 ],\n",
-       "          [ 628.2246 , 3137.9502 ],\n",
-       "          [ 628.2246 , 3185.543  ],\n",
-       "          [ 472.75488, 3185.543  ]], dtype=float32)),\n",
-       "  ('in',\n",
-       "   array([[ 222.09961, 3141.123  ],\n",
-       "          [ 269.69238, 3141.123  ],\n",
-       "          [ 269.69238, 3182.37   ],\n",
-       "          [ 222.09961, 3182.37   ]], dtype=float32))]]"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pipeline.recognize([r\"temp\\20230508_122035_preprocessed.png\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Requirement already satisfied: requests in c:\\users\\ayoo\\anaconda3\\envs\\mlenv\\lib\\site-packages (2.31.0)\n",
-      "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\ayoo\\anaconda3\\envs\\mlenv\\lib\\site-packages (from requests) (3.2.0)\n",
-      "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\ayoo\\anaconda3\\envs\\mlenv\\lib\\site-packages (from requests) (3.4)\n",
-      "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\ayoo\\anaconda3\\envs\\mlenv\\lib\\site-packages (from requests) (1.26.16)\n",
-      "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\ayoo\\anaconda3\\envs\\mlenv\\lib\\site-packages (from requests) (2023.7.22)\n"
-     ]
-    }
-   ],
-   "source": [
-    "!pip install requests"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'ParsedResults': [{'TextOverlay': {'Lines': [{'LineText': '7-ELEVEN.', 'Words': [{'WordText': '7', 'Left': 205.0, 'Top': 38.0, 'Height': 84.0, 'Width': 398.0}, {'WordText': '-', 'Left': 205.0, 'Top': 38.0, 'Height': 84.0, 'Width': 398.0}, {'WordText': 'ELEVEN', 'Left': 205.0, 'Top': 38.0, 'Height': 84.0, 'Width': 398.0}, {'WordText': '.', 'Left': 205.0, 'Top': 38.0, 'Height': 84.0, 'Width': 398.0}], 'MaxHeight': 84.0, 'MinTop': 38.0}, {'LineText': 'NHJ Convenience Store', 'Words': [{'WordText': 'NHJ', 'Left': 117.0, 'Top': 215.0, 'Height': 36.0, 'Width': 76.0}, {'WordText': 'Convenience', 'Left': 198.0, 'Top': 215.0, 'Height': 36.0, 'Width': 247.0}, {'WordText': 'Store', 'Left': 450.0, 'Top': 215.0, 'Height': 36.0, 'Width': 114.0}], 'MaxHeight': 36.0, 'MinTop': 215.0}, {'LineText': 'Owned & Operated by: Nancy A.', 'Words': [{'WordText': 'Owned', 'Left': 33.0, 'Top': 260.0, 'Height': 52.0, 'Width': 117.0}, {'WordText': '&', 'Left': 156.0, 'Top': 261.0, 'Height': 52.0, 'Width': 32.0}, {'WordText': 'Operated', 'Left': 195.0, 'Top': 261.0, 'Height': 52.0, 'Width': 182.0}, {'WordText': 'by', 'Left': 384.0, 'Top': 261.0, 'Height': 52.0, 'Width': 71.0}, {'WordText': ':', 'Left': 384.0, 'Top': 261.0, 'Height': 52.0, 'Width': 71.0}, {'WordText': 'Nancy', 'Left': 462.0, 'Top': 261.0, 'Height': 52.0, 'Width': 130.0}, {'WordText': 'A', 'Left': 598.0, 'Top': 260.0, 'Height': 52.0, 'Width': 47.0}, {'WordText': '.', 'Left': 598.0, 'Top': 260.0, 'Height': 52.0, 'Width': 47.0}], 'MaxHeight': 52.0, 'MinTop': 260.0}, {'LineText': 'Climacosa', 'Words': [{'WordText': 'Climacosa', 'Left': 244.0, 'Top': 315.0, 'Height': 38.0, 'Width': 193.0}], 'MaxHeight': 38.0, 'MinTop': 315.0}, {'LineText': 'VATREGTIN #933-598-685-002', 'Words': [{'WordText': 'VATREGTIN', 'Left': 75.0, 'Top': 361.0, 'Height': 43.0, 'Width': 204.0}, {'WordText': '#', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '933', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '-', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '598', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '-', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '685', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '-', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '002', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}], 'MaxHeight': 43.0, 'MinTop': 361.0}, {'LineText': 'Poblacion, Leon, Iloilo,', 'Words': [{'WordText': 'Poblacion', 'Left': 94.0, 'Top': 417.0, 'Height': 49.0, 'Width': 220.0}, {'WordText': ',', 'Left': 94.0, 'Top': 417.0, 'Height': 49.0, 'Width': 220.0}, {'WordText': 'Leon', 'Left': 321.0, 'Top': 417.0, 'Height': 49.0, 'Width': 122.0}, {'WordText': ',', 'Left': 321.0, 'Top': 417.0, 'Height': 49.0, 'Width': 122.0}, {'WordText': 'Iloilo', 'Left': 449.0, 'Top': 417.0, 'Height': 49.0, 'Width': 154.0}, {'WordText': ',', 'Left': 449.0, 'Top': 417.0, 'Height': 49.0, 'Width': 154.0}], 'MaxHeight': 49.0, 'MinTop': 417.0}, {'LineText': 'Philippines', 'Words': [{'WordText': 'Philippines', 'Left': 225.0, 'Top': 468.0, 'Height': 44.0, 'Width': 238.0}], 'MaxHeight': 44.0, 'MinTop': 468.0}, {'LineText': 'lel #: NULL', 'Words': [{'WordText': 'lel', 'Left': 221.0, 'Top': 524.0, 'Height': 40.0, 'Width': 78.0}, {'WordText': '#:', 'Left': 304.0, 'Top': 524.0, 'Height': 39.0, 'Width': 59.0}, {'WordText': 'NULL', 'Left': 368.0, 'Top': 523.0, 'Height': 40.0, 'Width': 89.0}], 'MaxHeight': 41.0, 'MinTop': 523.0}, {'LineText': '05/01/2023 (Mon) 23:00:57', 'Words': [{'WordText': '05', 'Left': 98.0, 'Top': 622.0, 'Height': 42.0, 'Width': 216.0}, {'WordText': '/', 'Left': 98.0, 'Top': 622.0, 'Height': 42.0, 'Width': 215.0}, {'WordText': '01', 'Left': 98.0, 'Top': 622.0, 'Height': 42.0, 'Width': 216.0}, {'WordText': '/', 'Left': 98.0, 'Top': 622.0, 'Height': 42.0, 'Width': 215.0}, {'WordText': '2023', 'Left': 98.0, 'Top': 622.0, 'Height': 42.0, 'Width': 216.0}, {'WordText': '(', 'Left': 319.0, 'Top': 622.0, 'Height': 42.0, 'Width': 105.0}, {'WordText': 'Mon', 'Left': 319.0, 'Top': 622.0, 'Height': 42.0, 'Width': 105.0}, {'WordText': ')', 'Left': 319.0, 'Top': 622.0, 'Height': 42.0, 'Width': 105.0}, {'WordText': '23', 'Left': 429.0, 'Top': 622.0, 'Height': 42.0, 'Width': 181.0}, {'WordText': ':', 'Left': 429.0, 'Top': 622.0, 'Height': 42.0, 'Width': 181.0}, {'WordText': '00', 'Left': 429.0, 'Top': 622.0, 'Height': 42.0, 'Width': 181.0}, {'WordText': ':', 'Left': 429.0, 'Top': 622.0, 'Height': 42.0, 'Width': 181.0}, {'WordText': '57', 'Left': 429.0, 'Top': 622.0, 'Height': 42.0, 'Width': 181.0}], 'MaxHeight': 42.0, 'MinTop': 622.0}, {'LineText': 'RCPT #2481347', 'Words': [{'WordText': 'RCPT', 'Left': 13.0, 'Top': 723.0, 'Height': 42.0, 'Width': 94.0}, {'WordText': '#', 'Left': 113.0, 'Top': 723.0, 'Height': 42.0, 'Width': 184.0}, {'WordText': '2481347', 'Left': 113.0, 'Top': 723.0, 'Height': 42.0, 'Width': 184.0}], 'MaxHeight': 42.0, 'MinTop': 723.0}, {'LineText': 'ROPT CNTHO', 'Words': [{'WordText': 'ROPT', 'Left': 472.0, 'Top': 723.0, 'Height': 49.0, 'Width': 96.0}, {'WordText': 'CNTHO', 'Left': 574.0, 'Top': 722.0, 'Height': 49.0, 'Width': 120.0}], 'MaxHeight': 50.0, 'MinTop': 722.0}, {'LineText': 'STORE#3058', 'Words': [{'WordText': 'STORE', 'Left': 13.0, 'Top': 771.0, 'Height': 47.0, 'Width': 219.0}, {'WordText': '#', 'Left': 13.0, 'Top': 771.0, 'Height': 47.0, 'Width': 219.0}, {'WordText': '3058', 'Left': 13.0, 'Top': 771.0, 'Height': 47.0, 'Width': 219.0}], 'MaxHeight': 47.0, 'MinTop': 771.0}, {'LineText': 'SN# :XTI43170', 'Words': [{'WordText': 'SN', 'Left': 433.0, 'Top': 771.0, 'Height': 49.0, 'Width': 66.0}, {'WordText': '#', 'Left': 433.0, 'Top': 771.0, 'Height': 49.0, 'Width': 66.0}, {'WordText': ':', 'Left': 505.0, 'Top': 771.0, 'Height': 50.0, 'Width': 189.0}, {'WordText': 'XTI43170', 'Left': 505.0, 'Top': 771.0, 'Height': 50.0, 'Width': 189.0}], 'MaxHeight': 50.0, 'MinTop': 771.0}, {'LineText': 'MIN #: 18112011091411051', 'Words': [{'WordText': 'MIN', 'Left': 13.0, 'Top': 830.0, 'Height': 39.0, 'Width': 73.0}, {'WordText': '#:', 'Left': 91.0, 'Top': 830.0, 'Height': 39.0, 'Width': 58.0}, {'WordText': '18112011091411051', 'Left': 154.0, 'Top': 830.0, 'Height': 39.0, 'Width': 360.0}], 'MaxHeight': 39.0, 'MinTop': 830.0}, {'LineText': 'STAFF: Angelica Duante', 'Words': [{'WordText': 'STAFF', 'Left': 13.0, 'Top': 879.0, 'Height': 43.0, 'Width': 124.0}, {'WordText': ':', 'Left': 13.0, 'Top': 879.0, 'Height': 43.0, 'Width': 124.0}, {'WordText': 'Angelica', 'Left': 142.0, 'Top': 879.0, 'Height': 43.0, 'Width': 177.0}, {'WordText': 'Duante', 'Left': 325.0, 'Top': 879.0, 'Height': 43.0, 'Width': 138.0}], 'MaxHeight': 43.0, 'MinTop': 879.0}, {'LineText': '7FKoreanßun', 'Words': [{'WordText': '7FKoreanßun', 'Left': 16.0, 'Top': 979.0, 'Height': 45.0, 'Width': 235.0}], 'MaxHeight': 45.0, 'MinTop': 979.0}, {'LineText': 'NissinYaSaBeet77g', 'Words': [{'WordText': 'NissinYaSaBeet77g', 'Left': 13.0, 'Top': 1032.0, 'Height': 42.0, 'Width': 365.0}], 'MaxHeight': 42.0, 'MinTop': 1032.0}, {'LineText': 'BBHOTDOGCREMYCHEES', 'Words': [{'WordText': 'BBHOTDOGCREMYCHEES', 'Left': 13.0, 'Top': 1084.0, 'Height': 39.0, 'Width': 384.0}], 'MaxHeight': 39.0, 'MinTop': 1084.0}, {'LineText': '39.00 Х 6', 'Words': [{'WordText': '39.00', 'Left': 140.0, 'Top': 1136.0, 'Height': 43.0, 'Width': 116.0}, {'WordText': 'Х', 'Left': 261.0, 'Top': 1136.0, 'Height': 43.0, 'Width': 100.0}, {'WordText': '6', 'Left': 366.0, 'Top': 1135.0, 'Height': 42.0, 'Width': 29.0}], 'MaxHeight': 44.0, 'MinTop': 1135.0}, {'LineText': 'chocvron? in1Ch020g', 'Words': [{'WordText': 'chocvron', 'Left': 13.0, 'Top': 1185.0, 'Height': 43.0, 'Width': 193.0}, {'WordText': '?', 'Left': 13.0, 'Top': 1185.0, 'Height': 43.0, 'Width': 193.0}, {'WordText': 'in1Ch020g', 'Left': 212.0, 'Top': 1185.0, 'Height': 43.0, 'Width': 186.0}], 'MaxHeight': 43.0, 'MinTop': 1185.0}, {'LineText': '15.00 X', 'Words': [{'WordText': '15.00', 'Left': 140.0, 'Top': 1240.0, 'Height': 43.0, 'Width': 116.0}, {'WordText': 'X', 'Left': 261.0, 'Top': 1240.0, 'Height': 42.0, 'Width': 42.0}], 'MaxHeight': 43.0, 'MinTop': 1240.0}, {'LineText': '2', 'Words': [{'WordText': '2', 'Left': 355.0, 'Top': 1240.0, 'Height': 39.0, 'Width': 39.0}], 'MaxHeight': 39.0, 'MinTop': 1240.0}, {'LineText': '55.004', 'Words': [{'WordText': '55.004', 'Left': 557.0, 'Top': 979.0, 'Height': 47.0, 'Width': 137.0}], 'MaxHeight': 47.0, 'MinTop': 979.0}, {'LineText': '40.000', 'Words': [{'WordText': '40.000', 'Left': 560.0, 'Top': 1031.0, 'Height': 48.0, 'Width': 134.0}], 'MaxHeight': 48.0, 'MinTop': 1031.0}, {'LineText': '234.000', 'Words': [{'WordText': '234.000', 'Left': 534.0, 'Top': 1135.0, 'Height': 47.0, 'Width': 160.0}], 'MaxHeight': 47.0, 'MinTop': 1135.0}, {'LineText': '30.000', 'Words': [{'WordText': '30.000', 'Left': 557.0, 'Top': 1237.0, 'Height': 46.0, 'Width': 137.0}], 'MaxHeight': 46.0, 'MinTop': 1237.0}, {'LineText': 'Total (10)', 'Words': [{'WordText': 'Total', 'Left': 13.0, 'Top': 1340.0, 'Height': 44.0, 'Width': 121.0}, {'WordText': '(', 'Left': 139.0, 'Top': 1342.0, 'Height': 44.0, 'Width': 86.0}, {'WordText': '10', 'Left': 139.0, 'Top': 1342.0, 'Height': 44.0, 'Width': 86.0}, {'WordText': ')', 'Left': 139.0, 'Top': 1342.0, 'Height': 44.0, 'Width': 86.0}], 'MaxHeight': 46.0, 'MinTop': 1340.0}, {'LineText': 'CASH', 'Words': [{'WordText': 'CASH', 'Left': 55.0, 'Top': 1390.0, 'Height': 43.0, 'Width': 91.0}], 'MaxHeight': 43.0, 'MinTop': 1390.0}, {'LineText': 'CHANGE', 'Words': [{'WordText': 'CHANGE', 'Left': 52.0, 'Top': 1442.0, 'Height': 43.0, 'Width': 137.0}], 'MaxHeight': 43.0, 'MinTop': 1442.0}, {'LineText': '359.00', 'Words': [{'WordText': '359.00', 'Left': 557.0, 'Top': 1341.0, 'Height': 47.0, 'Width': 137.0}], 'MaxHeight': 47.0, 'MinTop': 1341.0}, {'LineText': '1000.00', 'Words': [{'WordText': '1000.00', 'Left': 537.0, 'Top': 1389.0, 'Height': 48.0, 'Width': 154.0}], 'MaxHeight': 48.0, 'MinTop': 1389.0}, {'LineText': '641.00', 'Words': [{'WordText': '641.00', 'Left': 557.0, 'Top': 1442.0, 'Height': 46.0, 'Width': 134.0}], 'MaxHeight': 46.0, 'MinTop': 1442.0}, {'LineText': 'VATable', 'Words': [{'WordText': 'VATable', 'Left': 52.0, 'Top': 1546.0, 'Height': 40.0, 'Width': 157.0}], 'MaxHeight': 40.0, 'MinTop': 1546.0}, {'LineText': 'VAT_Tax', 'Words': [{'WordText': 'VAT_Tax', 'Left': 52.0, 'Top': 1598.0, 'Height': 50.0, 'Width': 157.0}], 'MaxHeight': 50.0, 'MinTop': 1598.0}, {'LineText': 'Zero_Rated', 'Words': [{'WordText': 'Zero_Rated', 'Left': 52.0, 'Top': 1649.0, 'Height': 48.0, 'Width': 219.0}], 'MaxHeight': 48.0, 'MinTop': 1649.0}, {'LineText': 'VAT_Exempted', 'Words': [{'WordText': 'VAT_Exempted', 'Left': 52.0, 'Top': 1699.0, 'Height': 50.0, 'Width': 264.0}], 'MaxHeight': 50.0, 'MinTop': 1699.0}, {'LineText': '320.54', 'Words': [{'WordText': '320.54', 'Left': 557.0, 'Top': 1546.0, 'Height': 46.0, 'Width': 134.0}], 'MaxHeight': 46.0, 'MinTop': 1546.0}, {'LineText': '38.46', 'Words': [{'WordText': '38.46', 'Left': 577.0, 'Top': 1598.0, 'Height': 43.0, 'Width': 114.0}], 'MaxHeight': 43.0, 'MinTop': 1598.0}, {'LineText': '0.00', 'Words': [{'WordText': '0.00', 'Left': 600.0, 'Top': 1651.0, 'Height': 42.0, 'Width': 91.0}], 'MaxHeight': 42.0, 'MinTop': 1651.0}, {'LineText': '0.00', 'Words': [{'WordText': '0.00', 'Left': 599.0, 'Top': 1702.0, 'Height': 43.0, 'Width': 95.0}], 'MaxHeight': 43.0, 'MinTop': 1702.0}, {'LineText': 'Sold To: 9906087698684', 'Words': [{'WordText': 'Sold', 'Left': 13.0, 'Top': 1803.0, 'Height': 42.0, 'Width': 94.0}, {'WordText': 'To', 'Left': 113.0, 'Top': 1803.0, 'Height': 42.0, 'Width': 79.0}, {'WordText': ':', 'Left': 113.0, 'Top': 1803.0, 'Height': 42.0, 'Width': 79.0}, {'WordText': '9906087698684', 'Left': 197.0, 'Top': 1803.0, 'Height': 42.0, 'Width': 285.0}], 'MaxHeight': 42.0, 'MinTop': 1803.0}, {'LineText': 'Name:', 'Words': [{'WordText': 'Name', 'Left': 10.0, 'Top': 1856.0, 'Height': 39.0, 'Width': 111.0}, {'WordText': ':', 'Left': 10.0, 'Top': 1856.0, 'Height': 39.0, 'Width': 111.0}], 'MaxHeight': 39.0, 'MinTop': 1856.0}, {'LineText': 'Address:', 'Words': [{'WordText': 'Address', 'Left': 13.0, 'Top': 1907.0, 'Height': 40.0, 'Width': 170.0}, {'WordText': ':', 'Left': 13.0, 'Top': 1907.0, 'Height': 40.0, 'Width': 170.0}], 'MaxHeight': 40.0, 'MinTop': 1907.0}, {'LineText': 'TIN:', 'Words': [{'WordText': 'TIN', 'Left': 13.0, 'Top': 1957.0, 'Height': 39.0, 'Width': 85.0}, {'WordText': ':', 'Left': 13.0, 'Top': 1957.0, 'Height': 39.0, 'Width': 85.0}], 'MaxHeight': 39.0, 'MinTop': 1957.0}, {'LineText': 'Philippine Seven Corporation', 'Words': [{'WordText': 'Philippine', 'Left': 10.0, 'Top': 2060.0, 'Height': 43.0, 'Width': 226.0}, {'WordText': 'Seven', 'Left': 241.0, 'Top': 2060.0, 'Height': 43.0, 'Width': 118.0}, {'WordText': 'Corporation', 'Left': 365.0, 'Top': 2060.0, 'Height': 43.0, 'Width': 241.0}], 'MaxHeight': 43.0, 'MinTop': 2060.0}, {'LineText': '7th Floor The Columbia Tower', 'Words': [{'WordText': '7th', 'Left': 13.0, 'Top': 2116.0, 'Height': 36.0, 'Width': 72.0}, {'WordText': 'Floor', 'Left': 90.0, 'Top': 2116.0, 'Height': 36.0, 'Width': 126.0}, {'WordText': 'The', 'Left': 220.0, 'Top': 2116.0, 'Height': 36.0, 'Width': 76.0}, {'WordText': 'Columbia', 'Left': 301.0, 'Top': 2116.0, 'Height': 36.0, 'Width': 189.0}, {'WordText': 'Tower', 'Left': 495.0, 'Top': 2116.0, 'Height': 36.0, 'Width': 108.0}], 'MaxHeight': 36.0, 'MinTop': 2116.0}, {'LineText': 'Ortigas Avenue, Mandaluyong', 'Words': [{'WordText': 'Ortigas', 'Left': 33.0, 'Top': 2161.0, 'Height': 49.0, 'Width': 154.0}, {'WordText': 'Avenue', 'Left': 192.0, 'Top': 2161.0, 'Height': 49.0, 'Width': 159.0}, {'WordText': ',', 'Left': 192.0, 'Top': 2161.0, 'Height': 49.0, 'Width': 159.0}, {'WordText': 'Mandaluyong', 'Left': 358.0, 'Top': 2161.0, 'Height': 49.0, 'Width': 248.0}], 'MaxHeight': 49.0, 'MinTop': 2161.0}, {'LineText': 'City', 'Words': [{'WordText': 'City', 'Left': 29.0, 'Top': 2214.0, 'Height': 42.0, 'Width': 94.0}], 'MaxHeight': 42.0, 'MinTop': 2214.0}, {'LineText': 'TIN: 000-390-189-000', 'Words': [{'WordText': 'TIN', 'Left': 13.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 90.0}, {'WordText': ':', 'Left': 13.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 90.0}, {'WordText': '000', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '-', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '390', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '-', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '189', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '-', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '000', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}], 'MaxHeight': 46.0, 'MinTop': 2266.0}, {'LineText': 'BIR ACCI #', 'Words': [{'WordText': 'BIR', 'Left': 10.0, 'Top': 2318.0, 'Height': 39.0, 'Width': 78.0}, {'WordText': 'ACCI', 'Left': 93.0, 'Top': 2318.0, 'Height': 39.0, 'Width': 98.0}, {'WordText': '#', 'Left': 195.0, 'Top': 2318.0, 'Height': 39.0, 'Width': 30.0}], 'MaxHeight': 40.0, 'MinTop': 2318.0}, {'LineText': '116-000390189-000346 19602', 'Words': [{'WordText': '116', 'Left': 33.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 430.0}, {'WordText': '-', 'Left': 33.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 430.0}, {'WordText': '000390189', 'Left': 33.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 430.0}, {'WordText': '-', 'Left': 33.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 430.0}, {'WordText': '000346', 'Left': 33.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 430.0}, {'WordText': '19602', 'Left': 468.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 118.0}], 'MaxHeight': 43.0, 'MinTop': 2366.0}, {'LineText': 'AcciDate: 08/01/2020', 'Words': [{'WordText': 'AcciDate', 'Left': 13.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 194.0}, {'WordText': ':', 'Left': 13.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 194.0}, {'WordText': '08', 'Left': 212.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 266.0}, {'WordText': '/', 'Left': 213.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 266.0}, {'WordText': '01', 'Left': 212.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 266.0}, {'WordText': '/', 'Left': 213.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 266.0}, {'WordText': '2020', 'Left': 212.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 266.0}], 'MaxHeight': 42.0, 'MinTop': 2419.0}, {'LineText': '07/31/2025', 'Words': [{'WordText': '07', 'Left': 29.0, 'Top': 2470.0, 'Height': 44.0, 'Width': 219.0}, {'WordText': '/', 'Left': 29.0, 'Top': 2470.0, 'Height': 44.0, 'Width': 219.0}, {'WordText': '31', 'Left': 29.0, 'Top': 2470.0, 'Height': 44.0, 'Width': 219.0}, {'WordText': '/', 'Left': 29.0, 'Top': 2470.0, 'Height': 44.0, 'Width': 219.0}, {'WordText': '2025', 'Left': 29.0, 'Top': 2470.0, 'Height': 44.0, 'Width': 219.0}], 'MaxHeight': 44.0, 'MinTop': 2470.0}, {'LineText': 'Permit #:', 'Words': [{'WordText': 'Permit', 'Left': 10.0, 'Top': 2526.0, 'Height': 40.0, 'Width': 142.0}, {'WordText': '#:', 'Left': 156.0, 'Top': 2527.0, 'Height': 39.0, 'Width': 46.0}], 'MaxHeight': 40.0, 'MinTop': 2526.0}, {'LineText': 'FP112018-074-0194656-00002', 'Words': [{'WordText': 'FP112018', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '-', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '074', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '-', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '0194656', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '-', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '00002', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}], 'MaxHeight': 39.0, 'MinTop': 2572.0}, {'LineText': 'Get a chance to win a trip for', 'Words': [{'WordText': 'Get', 'Left': 29.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 78.0}, {'WordText': 'a', 'Left': 112.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 34.0}, {'WordText': 'chance', 'Left': 151.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 146.0}, {'WordText': 'to', 'Left': 302.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 54.0}, {'WordText': 'win', 'Left': 361.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 83.0}, {'WordText': 'a', 'Left': 448.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 34.0}, {'WordText': 'trip', 'Left': 487.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 107.0}, {'WordText': 'for', 'Left': 599.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 69.0}], 'MaxHeight': 39.0, 'MinTop': 2679.0}, {'LineText': '2 to Korea when you buy PISO', 'Words': [{'WordText': '2', 'Left': 52.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 34.0}, {'WordText': 'to', 'Left': 91.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 58.0}, {'WordText': 'Korea', 'Left': 154.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 122.0}, {'WordText': 'when', 'Left': 281.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 102.0}, {'WordText': 'you', 'Left': 388.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 78.0}, {'WordText': 'buy', 'Left': 471.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 78.0}, {'WordText': 'PISO', 'Left': 554.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 91.0}], 'MaxHeight': 39.0, 'MinTop': 2731.0}, {'LineText': 'worth of 7-Eleven items. Earn', 'Words': [{'WordText': 'worth', 'Left': 29.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 116.0}, {'WordText': 'of', 'Left': 152.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 61.0}, {'WordText': '7', 'Left': 219.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 184.0}, {'WordText': '-', 'Left': 219.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 184.0}, {'WordText': 'Eleven', 'Left': 219.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 184.0}, {'WordText': 'items', 'Left': 409.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 135.0}, {'WordText': '.', 'Left': 409.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 135.0}, {'WordText': 'Earn', 'Left': 550.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 99.0}], 'MaxHeight': 49.0, 'MinTop': 2783.0}, {'LineText': '3 eRaffle entries when you buy', 'Words': [{'WordText': '3', 'Left': 29.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 34.0}, {'WordText': 'eRaffle', 'Left': 68.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 166.0}, {'WordText': 'entries', 'Left': 239.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 161.0}, {'WordText': 'when', 'Left': 404.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 102.0}, {'WordText': 'you', 'Left': 512.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 78.0}, {'WordText': 'buy', 'Left': 594.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 70.0}], 'MaxHeight': 39.0, 'MinTop': 2832.0}, {'LineText': 'discounted booster Items. Per', 'Words': [{'WordText': 'discounted', 'Left': 33.0, 'Top': 2888.0, 'Height': 42.0, 'Width': 221.0}, {'WordText': 'booster', 'Left': 259.0, 'Top': 2888.0, 'Height': 42.0, 'Width': 168.0}, {'WordText': 'Items', 'Left': 432.0, 'Top': 2888.0, 'Height': 42.0, 'Width': 137.0}, {'WordText': '.', 'Left': 432.0, 'Top': 2888.0, 'Height': 42.0, 'Width': 137.0}, {'WordText': 'Per', 'Left': 574.0, 'Top': 2888.0, 'Height': 42.0, 'Width': 68.0}], 'MaxHeight': 42.0, 'MinTop': 2888.0}, {'LineText': 'DTI FAIR TRADE Permit Number:', 'Words': [{'WordText': 'DTI', 'Left': 29.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 78.0}, {'WordText': 'FAIR', 'Left': 113.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 100.0}, {'WordText': 'TRADE', 'Left': 218.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 121.0}, {'WordText': 'Permit', 'Left': 344.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 142.0}, {'WordText': 'Number', 'Left': 491.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 151.0}, {'WordText': ':', 'Left': 491.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 151.0}], 'MaxHeight': 42.0, 'MinTop': 2933.0}, {'LineText': '163019 Series of 2023..', 'Words': [{'WordText': '163019', 'Left': 117.0, 'Top': 2988.0, 'Height': 40.0, 'Width': 135.0}, {'WordText': 'Series', 'Left': 257.0, 'Top': 2988.0, 'Height': 40.0, 'Width': 145.0}, {'WordText': 'of', 'Left': 407.0, 'Top': 2988.0, 'Height': 40.0, 'Width': 55.0}, {'WordText': '2023', 'Left': 467.0, 'Top': 2988.0, 'Height': 40.0, 'Width': 165.0}, {'WordText': '..', 'Left': 467.0, 'Top': 2988.0, 'Height': 40.0, 'Width': 165.0}], 'MaxHeight': 40.0, 'MinTop': 2988.0}, {'LineText': 'facebook.com/711philippines.', 'Words': [{'WordText': 'facebook.com', 'Left': 52.0, 'Top': 3037.0, 'Height': 42.0, 'Width': 590.0}, {'WordText': '/', 'Left': 52.0, 'Top': 3037.0, 'Height': 42.0, 'Width': 590.0}, {'WordText': '711philippines', 'Left': 52.0, 'Top': 3037.0, 'Height': 42.0, 'Width': 590.0}, {'WordText': '.', 'Left': 52.0, 'Top': 3037.0, 'Height': 42.0, 'Width': 590.0}], 'MaxHeight': 42.0, 'MinTop': 3037.0}, {'LineText': '- THIS IS AN OFFICIAL RECEIPT -', 'Words': [{'WordText': '-', 'Left': 0.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 46.0}, {'WordText': 'THIS', 'Left': 52.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 98.0}, {'WordText': 'IS', 'Left': 155.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 57.0}, {'WordText': 'AN', 'Left': 219.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 52.0}, {'WordText': 'OFFICIAL', 'Left': 276.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 184.0}, {'WordText': 'RECEIPT', 'Left': 466.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 167.0}, {'WordText': '-', 'Left': 638.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 30.0}], 'MaxHeight': 46.0, 'MinTop': 3138.0}], 'HasOverlay': True}, 'TextOrientation': '0', 'FileParseExitCode': 1, 'ParsedText': '7-ELEVEN.\\nNHJ Convenience Store\\nOwned & Operated by: Nancy A.\\nClimacosa\\nVATREGTIN #933-598-685-002\\nPoblacion, Leon, Iloilo,\\nPhilippines\\nlel #: NULL\\n05/01/2023 (Mon) 23:00:57\\nRCPT #2481347\\nROPT CNTHO\\nSTORE#3058\\nSN# :XTI43170\\nMIN #: 18112011091411051\\nSTAFF: Angelica Duante\\n7FKoreanßun\\nNissinYaSaBeet77g\\nBBHOTDOGCREMYCHEES\\n39.00 Х 6\\nchocvron? in1Ch020g\\n15.00 X\\n2\\n55.004\\n40.000\\n234.000\\n30.000\\nTotal (10)\\nCASH\\nCHANGE\\n359.00\\n1000.00\\n641.00\\nVATable\\nVAT_Tax\\nZero_Rated\\nVAT_Exempted\\n320.54\\n38.46\\n0.00\\n0.00\\nSold To: 9906087698684\\nName:\\nAddress:\\nTIN:\\nPhilippine Seven Corporation\\n7th Floor The Columbia Tower\\nOrtigas Avenue, Mandaluyong\\nCity\\nTIN: 000-390-189-000\\nBIR ACCI #\\n116-000390189-000346 19602\\nAcciDate: 08/01/2020\\n07/31/2025\\nPermit #:\\nFP112018-074-0194656-00002\\nGet a chance to win a trip for\\n2 to Korea when you buy PISO\\nworth of 7-Eleven items. Earn\\n3 eRaffle entries when you buy\\ndiscounted booster Items. Per\\nDTI FAIR TRADE Permit Number:\\n163019 Series of 2023..\\nfacebook.com/711philippines.\\n- THIS IS AN OFFICIAL RECEIPT -', 'ErrorMessage': '', 'ErrorDetails': ''}], 'OCRExitCode': 1, 'IsErroredOnProcessing': False, 'ProcessingTimeInMilliseconds': '2593', 'SearchablePDFURL': 'Searchable PDF not generated as it was not requested.'}\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Import requests library\n",
-    "import requests\n",
-    "\n",
-    "# Define the OCR API endpoint\n",
-    "url = \"https://api.ocr.space/parse/image\"\n",
-    "\n",
-    "# Define the API key and the language\n",
-    "api_key = \"K88232854988957\"\n",
-    "language = \"eng\"\n",
-    "\n",
-    "# Define the image file path\n",
-    "image_file = r\"C:\\Users\\Ayoo\\Desktop\\webapp\\predictions\\imgs\\20230508_122035.jpg\"\n",
-    "\n",
-    "# Open the image file as binary\n",
-    "with open(image_file, \"rb\") as f:\n",
-    "    # Define the payload for the API request\n",
-    "    payload = {\n",
-    "        \"apikey\": api_key,\n",
-    "        \"language\": language,\n",
-    "        \"isOverlayRequired\": True, # Optional, set to True if you want the coordinates of the words\n",
-    "        \"OCREngine\": 2 # OCR Engine 2 for Layoutlmv3\n",
-    "    }\n",
-    "    # Define the file parameter for the API request\n",
-    "    file = {\n",
-    "        \"file\": f\n",
-    "    }\n",
-    "    # Send the POST request to the OCR API\n",
-    "    response = requests.post(url, data=payload, files=file)\n",
-    "\n",
-    "# Check the status code of the response\n",
-    "if response.status_code == 200:\n",
-    "    # Parse the JSON response\n",
-    "    result = response.json()\n",
-    "    # Print the parsed text\n",
-    "    print(result)\n",
-    "else:\n",
-    "    # Print the error message\n",
-    "    print(\"Error: \" + response.text)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "TypeError",
-     "evalue": "Object of type Response is not JSON serializable",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[1;32mIn[13], line 4\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m      3\u001b[0m \u001b[38;5;66;03m# Assuming 'response' is the JSON response from the OCR API\u001b[39;00m\n\u001b[1;32m----> 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdumps\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m4\u001b[39;49m\u001b[43m)\u001b[49m)\n",
-      "File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\json\\__init__.py:238\u001b[0m, in \u001b[0;36mdumps\u001b[1;34m(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)\u001b[0m\n\u001b[0;32m    232\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m    233\u001b[0m     \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m=\u001b[39m JSONEncoder\n\u001b[0;32m    234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[0;32m    235\u001b[0m \u001b[43m    \u001b[49m\u001b[43mskipkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskipkeys\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mensure_ascii\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mensure_ascii\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    236\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcheck_circular\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_circular\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mallow_nan\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mallow_nan\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    237\u001b[0m \u001b[43m    \u001b[49m\u001b[43mseparators\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mseparators\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdefault\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdefault\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msort_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m--> 238\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\json\\encoder.py:202\u001b[0m, in \u001b[0;36mJSONEncoder.encode\u001b[1;34m(self, o)\u001b[0m\n\u001b[0;32m    200\u001b[0m chunks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miterencode(o, _one_shot\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m    201\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(chunks, (\u001b[38;5;28mlist\u001b[39m, \u001b[38;5;28mtuple\u001b[39m)):\n\u001b[1;32m--> 202\u001b[0m     chunks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(chunks)\n\u001b[0;32m    203\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(chunks)\n",
-      "File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\json\\encoder.py:439\u001b[0m, in \u001b[0;36m_make_iterencode.<locals>._iterencode\u001b[1;34m(o, _current_indent_level)\u001b[0m\n\u001b[0;32m    437\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCircular reference detected\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m    438\u001b[0m     markers[markerid] \u001b[38;5;241m=\u001b[39m o\n\u001b[1;32m--> 439\u001b[0m o \u001b[38;5;241m=\u001b[39m \u001b[43m_default\u001b[49m\u001b[43m(\u001b[49m\u001b[43mo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    440\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m _iterencode(o, _current_indent_level)\n\u001b[0;32m    441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m markers \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
-      "File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\json\\encoder.py:180\u001b[0m, in \u001b[0;36mJSONEncoder.default\u001b[1;34m(self, o)\u001b[0m\n\u001b[0;32m    161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdefault\u001b[39m(\u001b[38;5;28mself\u001b[39m, o):\n\u001b[0;32m    162\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Implement this method in a subclass such that it returns\u001b[39;00m\n\u001b[0;32m    163\u001b[0m \u001b[38;5;124;03m    a serializable object for ``o``, or calls the base implementation\u001b[39;00m\n\u001b[0;32m    164\u001b[0m \u001b[38;5;124;03m    (to raise a ``TypeError``).\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    178\u001b[0m \n\u001b[0;32m    179\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[1;32m--> 180\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mObject of type \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mo\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m    181\u001b[0m                     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mis not JSON serializable\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
-      "\u001b[1;31mTypeError\u001b[0m: Object of type Response is not JSON serializable"
-     ]
-    }
-   ],
-   "source": [
-    "import json\n",
-    "\n",
-    "# Assuming 'response' is the JSON response from the OCR API\n",
-    "print(json.dumps(response, indent=4))\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "mlenv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

inferenced/csv_files/Output_0.csv DELETED Viewed

@@ -1,4 +0,0 @@
-RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
-# 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,C26rnTeaLemon500ml,39.000,88.00,9.43
-# 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,COBRENRGYORNK350ML,28.000,88.00,9.43
-# 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,OTSHIBMPFRSPLNS50G,21.000,88.00,9.43

inferenced/csv_files/Output_1.csv DELETED Viewed

	@@ -1,2 +0,0 @@
1	- RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
2	- 01053710,Iloilo Grace Pharmacy,C & J Building Jalandoni Extension Bolilao,08 / 12 / 2023,10 : 07,PharmtonEsentialCao,23.75,23 - 75,

inferenced/csv_files/Output_2.csv DELETED Viewed

@@ -1,3 +0,0 @@
-RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
-# 1457229,7 - ELEVEN �,Poblacion . Leon . Iloilo .,05 / 01 / 2023 ( Mon ),16 : 54 : 23,NESTEALEMICET500ML,35.000,76.00,8.14
-# 1457229,7 - ELEVEN �,Poblacion . Leon . Iloilo .,05 / 01 / 2023 ( Mon ),16 : 54 : 23,ArlaGStrwbryT200ml,41.000,76.00,8.14

inferenced/csv_files/Output_3.csv DELETED Viewed

	@@ -1,2 +0,0 @@
1	- RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
2	- 000036410,WVSU Multi Purpose Cooperative,Luna Street Lapaz Iloilo City,10 - 25 - 2023,01 : 29 : 49 PM,COKE,13.00,13.00,1.39

inferenced/csv_files/Output_4.csv DELETED Viewed

	@@ -1,2 +0,0 @@
1	- RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
2	- 01053735,Iloilo Grace Pharmacy,C & J Building Jalandoni Extension Bolilao,09 / 12 / 2023,11 : 07,EQDryTravelM18,3.31.00,331 - 00,35.46

inferenced/output.csv DELETED Viewed

@@ -1,9 +0,0 @@
-RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
-# 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,C26rnTeaLemon500ml,39.000,88.00,9.43
-# 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,COBRENRGYORNK350ML,28.000,88.00,9.43
-# 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,OTSHIBMPFRSPLNS50G,21.000,88.00,9.43
-01053710,Iloilo Grace Pharmacy,C & J Building Jalandoni Extension Bolilao,08 / 12 / 2023,10 : 07,PharmtonEsentialCao,23.75,23 - 75,
-# 1457229,7 - ELEVEN �,Poblacion . Leon . Iloilo .,05 / 01 / 2023 ( Mon ),16 : 54 : 23,NESTEALEMICET500ML,35.000,76.00,8.14
-# 1457229,7 - ELEVEN �,Poblacion . Leon . Iloilo .,05 / 01 / 2023 ( Mon ),16 : 54 : 23,ArlaGStrwbryT200ml,41.000,76.00,8.14
-000036410,WVSU Multi Purpose Cooperative,Luna Street Lapaz Iloilo City,10 - 25 - 2023,01 : 29 : 49 PM,COKE,13.00,13.00,1.39
-01053735,Iloilo Grace Pharmacy,C & J Building Jalandoni Extension Bolilao,09 / 12 / 2023,11 : 07,EQDryTravelM18,3.31.00,331 - 00,35.46

inferenced/sample1_711_inference.jpg DELETED Viewed

Binary file (295 kB)

inferenced/sample1_grace_inference.jpg DELETED Viewed

Binary file (186 kB)

inferenced/sample_711_inference.jpg DELETED Viewed

Binary file (298 kB)

inferenced/sample_coop_inference.jpg DELETED Viewed

Binary file (276 kB)

inferenced/sample_grace_inference.jpg DELETED Viewed

Binary file (205 kB)

log/error_output.log CHANGED Viewed

@@ -308,3 +308,21 @@ Traceback (most recent call last):
 TypeError: The view function for 'create_csv' did not return a valid response. The function either returned None or ended without a return statement.
 2024-02-22 10:18:01,539 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 10:18:01] "[35m[1mGET /create_csv HTTP/1.1[0m" 500 -
 2024-02-22 10:18:02,099 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 10:18:02] "[33mGET /get_data HTTP/1.1[0m" 404 -

 TypeError: The view function for 'create_csv' did not return a valid response. The function either returned None or ended without a return statement.
 2024-02-22 10:18:01,539 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 10:18:01] "[35m[1mGET /create_csv HTTP/1.1[0m" 500 -
 2024-02-22 10:18:02,099 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 10:18:02] "[33mGET /get_data HTTP/1.1[0m" 404 -
+2024-02-22 17:02:51,698 ERROR app 'NoneType' object is not iterable
+2024-02-22 17:02:51,706 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 17:02:51] "[32mGET /run_inference HTTP/1.1[0m" 302 -
+2024-02-22 17:02:51,754 ERROR app Exception on /create_csv [GET]
+Traceback (most recent call last):
+  File "C:\Users\Ayoo\anaconda3\envs\mlenv\Lib\site-packages\flask\app.py", line 2190, in wsgi_app
+    response = self.full_dispatch_request()
+               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "C:\Users\Ayoo\anaconda3\envs\mlenv\Lib\site-packages\flask\app.py", line 1487, in full_dispatch_request
+    return self.finalize_request(rv)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "C:\Users\Ayoo\anaconda3\envs\mlenv\Lib\site-packages\flask\app.py", line 1506, in finalize_request
+    response = self.make_response(rv)
+               ^^^^^^^^^^^^^^^^^^^^^^
+  File "C:\Users\Ayoo\anaconda3\envs\mlenv\Lib\site-packages\flask\app.py", line 1801, in make_response
+    raise TypeError(
+TypeError: The view function for 'create_csv' did not return a valid response. The function either returned None or ended without a return statement.
+2024-02-22 17:02:51,766 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 17:02:51] "[35m[1mGET /create_csv HTTP/1.1[0m" 500 -
+2024-02-22 17:02:52,348 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 17:02:52] "[33mGET /get_data HTTP/1.1[0m" 404 -

static/inference/Layoutlmv3_inference/__init__.py DELETED Viewed

File without changes

static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (176 Bytes)

static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-311.pyc DELETED Viewed

Binary file (195 Bytes)

static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-312.pyc DELETED Viewed

Binary file (180 Bytes)

static/inference/Layoutlmv3_inference/__pycache__/annotate_image.cpython-310.pyc DELETED Viewed

Binary file (2.04 kB)

static/inference/Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc DELETED Viewed

Binary file (3.87 kB)

static/inference/Layoutlmv3_inference/__pycache__/inference_handler.cpython-310.pyc DELETED Viewed

Binary file (6.83 kB)

static/inference/Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc DELETED Viewed

Binary file (13.5 kB)

static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-310.pyc DELETED Viewed

Binary file (3.51 kB)

static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-311.pyc DELETED Viewed

Binary file (9.92 kB)

static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-312.pyc DELETED Viewed

Binary file (5.24 kB)

static/inference/Layoutlmv3_inference/__pycache__/utils.cpython-310.pyc DELETED Viewed

Binary file (2.41 kB)

static/inference/Layoutlmv3_inference/__pycache__/utils.cpython-311.pyc DELETED Viewed

Binary file (3.84 kB)

static/inference/Layoutlmv3_inference/annotate_image.py DELETED Viewed

@@ -1,56 +0,0 @@
-import os
-from PIL import Image, ImageDraw, ImageFont
-from .utils import image_label_2_color
-def get_flattened_output(docs):
-  print("Running Flattened Output")
-  flattened_output = []
-  annotation_key = 'output'
-  for doc in docs:
-    flattened_output_item = {annotation_key: []}
-    doc_annotation = doc[annotation_key]
-    for i, span in enumerate(doc_annotation):
-      if len(span['words']) > 1:
-        for span_chunk in span['words']:
-          flattened_output_item[annotation_key].append(
-              {
-                  'label': span['label'],
-                  'text': span_chunk['text'],
-                  'words': [span_chunk]
-              }
-          )
-      else:
-        flattened_output_item[annotation_key].append(span)
-    flattened_output.append(flattened_output_item)
-  return flattened_output
-def annotate_image(image_path, annotation_object):
-  print("Annotating Images")
-  img = None
-  image = Image.open(image_path).convert('RGBA')
-  tmp = image.copy()
-  label2color = image_label_2_color(annotation_object)
-  overlay = Image.new('RGBA', tmp.size, (0, 0, 0)+(0,))
-  draw = ImageDraw.Draw(overlay)
-  font = ImageFont.load_default()
-  predictions = [span['label'] for span in annotation_object['output']]
-  boxes = [span['words'][0]['box'] for span in annotation_object['output']]
-  for prediction, box in zip(predictions, boxes):
-      draw.rectangle(box, outline=label2color[prediction],
-                     width=3, fill=label2color[prediction]+(int(255*0.33),))
-      draw.text((box[0] + 10, box[1] - 10), text=prediction,
-                fill=label2color[prediction], font=font)
-  img = Image.alpha_composite(tmp, overlay)
-  img = img.convert("RGB")
-  image_name = os.path.basename(image_path)
-  image_name = image_name[:image_name.find('.')]
-  output_folder = 'inferenced/'
-  os.makedirs(output_folder, exist_ok=True)
-  img.save(os.path.join(output_folder, f'{image_name}_inference.jpg'))

static/inference/Layoutlmv3_inference/inference_handler.py DELETED Viewed

@@ -1,199 +0,0 @@
-from .utils import load_model,load_processor,normalize_box,compare_boxes,adjacent
-from .annotate_image import get_flattened_output,annotate_image
-from PIL import Image,ImageDraw, ImageFont
-import logging
-import torch
-import json
-import os
-logger = logging.getLogger(__name__)
-class ModelHandler(object):
-    def __init__(self):
-        self.model = None
-        self.model_dir = None
-        self.device = 'cpu'
-        self.error = None
-        self.initialized = False
-        self._raw_input_data = None
-        self._processed_data = None
-        self._images_size = None
-    def initialize(self, context):
-        try:
-            logger.info("Loading transformer model")
-            self._context = context
-            properties = self._context
-            self.model_dir = properties.get("model_dir")
-            self.model = self.load(self.model_dir)
-            self.initialized = True
-        except Exception as e:
-            logger.error(f"Error initializing model: {str(e)}")
-            self.error = str(e)
-    def preprocess(self, batch):
-        try:
-            inference_dict = batch
-            self._raw_input_data = inference_dict
-            processor = load_processor()
-            images = [Image.open(path).convert("RGB")
-                      for path in inference_dict['image_path']]
-            self._images_size = [img.size for img in images]
-            words = inference_dict['words']
-            boxes = [[normalize_box(box, images[i].size[0], images[i].size[1])
-                      for box in doc] for i, doc in enumerate(inference_dict['bboxes'])]
-            encoded_inputs = processor(
-                images, words, boxes=boxes, return_tensors="pt", padding="max_length", truncation=True)
-            self._processed_data = encoded_inputs
-            return encoded_inputs
-        except Exception as e:
-            logger.error(f"Error in preprocessing: {str(e)}")
-            self.error = str(e)
-            return None
-    def load(self, model_dir):
-        try:
-            model = load_model(model_dir)
-            return model
-        except Exception as e:
-            logger.error(f"Error loading LayoutLMv3 model: {str(e)}")
-            self.error = str(e)
-            return None
-    def inference(self, model_input):
-        try:
-            with torch.no_grad():
-                inference_outputs = self.model(**model_input)
-                predictions = inference_outputs.logits.argmax(-1).tolist()
-            results = []
-            for i in range(len(predictions)):
-                tmp = dict()
-                tmp[f'output_{i}'] = predictions[i]
-                results.append(tmp)
-            return [results]
-        except Exception as e:
-            logger.error(f"Error in inference: {str(e)}")
-            self.error = str(e)
-            return None
-    def postprocess(self, inference_output):
-        try:
-            docs = []
-            k = 0
-            for page, doc_words in enumerate(self._raw_input_data['words']):
-                doc_list = []
-                width, height = self._images_size[page]
-                for i, doc_word in enumerate(doc_words, start=0):
-                    word_tagging = None
-                    word_labels = []
-                    word = dict()
-                    word['id'] = k
-                    k += 1
-                    word['text'] = doc_word
-                    word['pageNum'] = page + 1
-                    word['box'] = self._raw_input_data['bboxes'][page][i]
-                    _normalized_box = normalize_box(
-                        self._raw_input_data['bboxes'][page][i], width, height)
-                    for j, box in enumerate(self._processed_data['bbox'].tolist()[page]):
-                        if compare_boxes(box, _normalized_box):
-                            if self.model.config.id2label[inference_output[0][page][f'output_{page}'][j]] != 'O':
-                                word_labels.append(
-                                    self.model.config.id2label[inference_output[0][page][f'output_{page}'][j]][2:])
-                            else:
-                                word_labels.append('other')
-                    if word_labels != []:
-                        word_tagging = word_labels[0] if word_labels[0] != 'other' else word_labels[-1]
-                    else:
-                        word_tagging = 'other'
-                    word['label'] = word_tagging
-                    word['pageSize'] = {'width': width, 'height': height}
-                    if word['label'] != 'other':
-                        doc_list.append(word)
-                spans = []
-                def adjacents(entity): return [
-                    adj for adj in doc_list if adjacent(entity, adj)]
-                output_test_tmp = doc_list[:]
-                for entity in doc_list:
-                    if adjacents(entity) == []:
-                        spans.append([entity])
-                        output_test_tmp.remove(entity)
-                while output_test_tmp != []:
-                    span = [output_test_tmp[0]]
-                    output_test_tmp = output_test_tmp[1:]
-                    while output_test_tmp != [] and adjacent(span[-1], output_test_tmp[0]):
-                        span.append(output_test_tmp[0])
-                        output_test_tmp.remove(output_test_tmp[0])
-                    spans.append(span)
-                output_spans = []
-                for span in spans:
-                    if len(span) == 1:
-                        output_span = {"text": span[0]['text'],
-                                    "label": span[0]['label'],
-                                    "words": [{
-                                        'id': span[0]['id'],
-                                        'box': span[0]['box'],
-                                        'text': span[0]['text']
-                                    }],
-                                    }
-                    else:
-                        output_span = {"text": ' '.join([entity['text'] for entity in span]),
-                                    "label": span[0]['label'],
-                                    "words": [{
-                                        'id': entity['id'],
-                                        'box': entity['box'],
-                                        'text': entity['text']
-                                    } for entity in span]
-                                    }
-                    output_spans.append(output_span)
-                docs.append({f'output': output_spans})
-            return [json.dumps(docs, ensure_ascii=False)]
-        except Exception as e:
-            logger.error(f"Error in postprocessing: {str(e)}")
-            self.error = str(e)
-            return None
-    def handle(self, data, context):
-        try:
-            if not self.initialized:
-                self.initialize(context)
-            if data is None:
-                return None
-            model_input = self.preprocess(data)
-            if model_input is None:
-                return None
-            model_out = self.inference(model_input)
-            if model_out is None:
-                return None
-            inference_out = self.postprocess(model_out)[0]
-            with open('temp/LayoutlMV3InferenceOutput.json', 'w') as inf_out:
-                inf_out.write(inference_out)
-            inference_out_list = json.loads(inference_out)
-            flattened_output_list = get_flattened_output(inference_out_list)
-            print('Ready for Annotation')
-            for i, flattened_output in enumerate(flattened_output_list):
-                annotate_image(data['image_path'][i], flattened_output)
-        except Exception as e:
-            logger.error(f"Error handling request: {str(e)}")
-            self.error = str(e)
-_service = ModelHandler()
-def handle(data, context):
-    if not _service.initialized:
-        _service.initialize(context)
-    if data is None:
-        return None
-    return _service.handle(data, context)

static/inference/Layoutlmv3_inference/ocr.py DELETED Viewed

@@ -1,187 +0,0 @@
-import os
-import pandas as pd
-import cv2
-import numpy as np
-import json
-import requests
-import traceback
-from PIL import Image
-def preprocess_image(image_path, max_file_size_mb=1, target_file_size_mb=0.5):
-    try:
-        # Check file size
-        file_size_mb = os.path.getsize(image_path) / (1024 * 1024)  # Convert to megabytes
-        if file_size_mb > max_file_size_mb:
-            print(f"File size ({file_size_mb} MB) exceeds the maximum allowed size ({max_file_size_mb} MB). Resizing the image.")
-            # Read the image
-            image = cv2.imread(image_path)
-            # Calculate the new dimensions to achieve the target file size
-            ratio = target_file_size_mb / file_size_mb
-            new_width = int(image.shape[1] * np.sqrt(ratio))
-            new_height = int(image.shape[0] * np.sqrt(ratio))
-            # Enhance text
-            enhanced_img = enhance_txt(image)
-            # Resize the image
-            enhanced = cv2.resize(enhanced_img, (new_width, new_height))
-            return enhanced
-        else:
-            # If the file size is within the limit, proceed with the regular enhancement
-            image = cv2.imread(image_path)
-            enhanced = enhance_txt(image)
-            return enhanced
-    except Exception as e:
-        print(f"An error occurred: {str(e)}")
-        return None
-def enhance_txt(img, intensity_increase=20, bilateral_filter_diameter=9, bilateral_filter_sigma_color=75, bilateral_filter_sigma_space=75):
-    # Get the width and height of the image
-    w = img.shape[1]
-    h = img.shape[0]
-    w1 = int(w * 0.05)
-    w2 = int(w * 0.95)
-    h1 = int(h * 0.05)
-    h2 = int(h * 0.95)
-    ROI = img[h1:h2, w1:w2]  # 95% of the center of the image
-    threshold = np.mean(ROI) * 0.88  # % of average brightness
-    blurred = cv2.GaussianBlur(img, (1, 1), 0)
-    edged = 255 - cv2.Canny(blurred, 100, 150, apertureSize=7)
-    # Increase intensity by adding a constant value
-    img = np.clip(img + intensity_increase, 0, 255).astype(np.uint8)
-    # Apply bilateral filter to reduce noise
-    img = cv2.bilateralFilter(img, bilateral_filter_diameter, bilateral_filter_sigma_color, bilateral_filter_sigma_space)
-    _, binary = cv2.threshold(blurred, threshold, 255, cv2.THRESH_BINARY)
-    return binary
-def run_tesseract_on_preprocessed_image(preprocessed_image, image_path):
-    try:
-        image_name = os.path.basename(image_path)
-        image_name = image_name[:image_name.find('.')]
-        # Create the "temp" folder if it doesn't exist
-        temp_folder = "temp"
-        if not os.path.exists(temp_folder):
-            os.makedirs(temp_folder)
-        # Define the OCR API endpoint
-        url = "https://api.ocr.space/parse/image"
-        # Define the API key and the language
-        api_key = "K88232854988957"  # Replace with your actual OCR Space API key
-        language = "eng"
-        # Save the preprocessed image
-        cv2.imwrite(os.path.join(temp_folder, f"{image_name}_preprocessed.jpg"), preprocessed_image)
-        # Open the preprocessed image file as binary
-        with open(os.path.join(temp_folder, f"{image_name}_preprocessed.jpg"), "rb") as f:
-            # Define the payload for the API request
-            payload = {
-                "apikey": api_key,
-                "language": language,
-                "isOverlayRequired": True,
-                "OCREngine": 2
-            }
-            # Define the file parameter for the API request
-            file = {
-                "file": f
-            }
-            # Send the POST request to the OCR API
-            response = requests.post(url, data=payload, files=file)
-        # Check the status code of the response
-        if response.status_code == 200:
-            # Parse the JSON response
-            result = response.json()
-            print("---JSON file saved")
-            # Save the OCR result as JSON
-            with open(os.path.join(temp_folder, f"{image_name}_ocr.json"), 'w') as f:
-                json.dump(result, f)
-            return os.path.join(temp_folder, f"{image_name}_ocr.json")
-        else:
-            # Print the error message
-            print("Error: " + response.text)
-            return None
-    except Exception as e:
-        print(f"An error occurred during OCR request: {str(e)}")
-        return None
-def clean_tesseract_output(json_output_path):
-    try:
-        with open(json_output_path, 'r') as json_file:
-            data = json.load(json_file)
-        lines = data['ParsedResults'][0]['TextOverlay']['Lines']
-        words = []
-        for line in lines:
-            for word_info in line['Words']:
-                word = {}
-                origin_box = [
-                    word_info['Left'],
-                    word_info['Top'],
-                    word_info['Left'] + word_info['Width'],
-                    word_info['Top'] + word_info['Height']
-                ]
-                word['word_text'] = word_info['WordText']
-                word['word_box'] = origin_box
-                words.append(word)
-        return words
-    except (KeyError, IndexError, FileNotFoundError, json.JSONDecodeError) as e:
-        print(f"Error cleaning Tesseract output: {str(e)}")
-        return None
-def prepare_batch_for_inference(image_paths):
-    # print("my_function was called")
-    # traceback.print_stack()  # This will print the stack trace
-    print(f"Number of images to process: {len(image_paths)}")  # Print the total number of images to be processed
-    print("1. Preparing for Inference")
-    tsv_output_paths = []
-    inference_batch = dict()
-    print("2. Starting Preprocessing")
-    # Ensure that the image is only 1
-    for image_path in image_paths:
-        print(f"Processing the image: {image_path}")  # Print the image being processed
-        print("3. Preprocessing the Receipt")
-        preprocessed_image = preprocess_image(image_path)
-        if preprocessed_image is not None:
-            print("4. Preprocessing done. Running OCR")
-            json_output_path = run_tesseract_on_preprocessed_image(preprocessed_image, image_path)
-            print("5. OCR Complete")
-            if json_output_path:
-                tsv_output_paths.append(json_output_path)
-    print("6. Preprocessing and OCR Done")
-    # clean_outputs is a list of lists
-    clean_outputs = [clean_tesseract_output(tsv_path) for tsv_path in tsv_output_paths]
-    print("7. Cleaned OCR output")
-    word_lists = [[word['word_text'] for word in clean_output] for clean_output in clean_outputs]
-    print("8. Word List Created")
-    boxes_lists = [[word['word_box'] for word in clean_output] for clean_output in clean_outputs]
-    print("9. Box List Created")
-    inference_batch = {
-        "image_path": image_paths,
-        "bboxes": boxes_lists,
-        "words": word_lists
-    }
-    print("10. Prepared for Inference Batch")
-    return inference_batch

static/inference/Layoutlmv3_inference/utils.py DELETED Viewed

@@ -1,50 +0,0 @@
-import numpy as np
-from transformers import AutoModelForTokenClassification, AutoProcessor
-def normalize_box(bbox, width, height):
-    return [
-        int(bbox[0]*(1000/width)),
-        int(bbox[1]*(1000/height)),
-        int(bbox[2]*(1000/width)),
-        int(bbox[3]*(1000/height)),
-    ]
-def compare_boxes(b1, b2):
-    b1 = np.array([c for c in b1])
-    b2 = np.array([c for c in b2])
-    equal = np.array_equal(b1, b2)
-    return equal
-def unnormalize_box(bbox, width, height):
-    return [
-        width * (bbox[0] / 1000),
-        height * (bbox[1] / 1000),
-        width * (bbox[2] / 1000),
-        height * (bbox[3] / 1000),
-    ]
-def adjacent(w1, w2):
-  if w1['label'] == w2['label'] and abs(w1['id'] - w2['id']) == 1:
-    return True
-  return False
-def random_color():
-  return np.random.randint(0, 255, 3)
-def image_label_2_color(annotation):
-  if 'output' in annotation.keys():
-    image_labels = set([span['label'] for span in annotation['output']])
-    label2color = {f'{label}': (random_color()[0], random_color()[
-                                1], random_color()[2]) for label in image_labels}
-    return label2color
-  else:
-    raise ValueError('please use "output" as annotation key')
-def load_model(model_path):
-    model = AutoModelForTokenClassification.from_pretrained(model_path)
-    return model
-def load_processor():
-    processor = AutoProcessor.from_pretrained(
-        "microsoft/layoutlmv3-base", apply_ocr=False)
-    return processor

static/inference/preprocess.py DELETED Viewed

@@ -1,206 +0,0 @@
-import pandas as pd
-import numpy as np
-import os
-import argparse
-from datasets.features import ClassLabel
-from transformers import AutoProcessor
-from sklearn.model_selection import train_test_split
-from datasets import Features, Sequence, ClassLabel, Value, Array2D, Array3D, Dataset
-from datasets import Image as Img
-from PIL import Image
-from tqdm import tqdm_notebook  # Import tqdm_notebook for displaying progress bars
-import warnings
-warnings.filterwarnings('ignore')
-def read_text_file(file_path):
-    with open(file_path, 'r') as f:
-        return (f.readlines())
-def prepare_examples(examples):
-  images = examples[image_column_name]
-  words = examples[text_column_name]
-  boxes = examples[boxes_column_name]
-  word_labels = examples[label_column_name]
-  encoding = processor(images, words, boxes=boxes, word_labels=word_labels,
-                       truncation=True, padding="max_length")
-  return encoding
-def get_zip_dir_name():
-    try:
-        os.chdir('/kaggle/input/ocr-combinedrec')
-        dir_list1 = os.listdir()
-        dir_list = sorted(dir_list1)
-        any_file_name = dir_list[0]
-        # Using os.path.splitext to get the file extension
-        zip_dir_name, file_extension = os.path.splitext(any_file_name)
-        # Extracting the directory name using os.path.dirname
-        # zip_dir_name = os.path.dirname(any_file_name)
-        # Test
-        return 'dataset_files'
-        # Check if all files start with the extracted directory name
-        print(all(list(map(lambda x: x.startswith(zip_dir_name), dir_list))))
-        if all(list(map(lambda x: x.startswith(zip_dir_name), dir_list))):
-            return zip_dir_name
-        return False
-    finally:
-        os.chdir('./../')
-def filter_out_unannotated(example):
-    tags = example['ner_tags']
-    return not all([tag == label2id['O'] for tag in tags])
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--valid_size')
-    parser.add_argument('--output_path')
-    args = parser.parse_args()
-    TEST_SIZE = float(args.valid_size)
-    OUTPUT_PATH = args.output_path
-    os.makedirs(args.output_path, exist_ok=True)
-    files = {}
-    zip_dir_name = get_zip_dir_name()
-    if zip_dir_name:
-        files['train_box'] = read_text_file('/kaggle/input/ocr-combinedrec/dataset_files_box.txt')
-        files['train_image'] = read_text_file(os.path.join(
-            os.curdir, 'ocr-combinedrec', f'{zip_dir_name}_image.txt'))
-        files['train'] = read_text_file(os.path.join(
-            os.curdir, 'ocr-combinedrec', f'{zip_dir_name}.txt'))
-    else:
-        for f in os.listdir():
-            if f.endswith('.txt') and f.find('box') != -1:
-                files['train_box'] = read_text_file(os.path.join(os.curdir, f))
-            elif f.endswith('.txt') and f.find('image') != -1:
-                files['train_image'] = read_text_file(
-                    os.path.join(os.curdir, f))
-            elif f.endswith('.txt') and f.find('labels') == -1:
-                files['train'] = read_text_file(os.path.join(os.curdir, f))
-    assert(len(files['train']) == len(files['train_box']))
-    assert(len(files['train_box']) == len(files['train_image']))
-    assert(len(files['train_image']) == len(files['train']))
-    images = {}
-    for i, row in enumerate(files['train_image']):
-        if row != '\n':
-            image_name = row.split('\t')[-1]
-            images.setdefault(image_name.replace('\n', ''), []).append(i)
-    words, bboxes, ner_tags, image_path = [], [], [], []
-    for image, rows in images.items():
-        words.append([row.split('\t')[0].replace('\n', '')
-                     for row in files['train'][rows[0]:rows[-1]+1]])
-        ner_tags.append([row.split('\t')[1].replace('\n', '')
-                        for row in files['train'][rows[0]:rows[-1]+1]])
-        bboxes.append([box.split('\t')[1].replace('\n', '')
-                      for box in files['train_box'][rows[0]:rows[-1]+1]])
-        if zip_dir_name:
-            image_path.append(f"/kaggle/input/ocr-combinedrec/{zip_dir_name}/{image}")
-        else:
-            image_path.append(f"/kaggle/input/ocr-combinedrec/{image}")
-    labels = list(set([tag for doc_tag in ner_tags for tag in doc_tag]))
-    id2label = {v: k for v, k in enumerate(labels)}
-    label2id = {k: v for v, k in enumerate(labels)}
-    dataset_dict = {
-        'id': range(len(words)),
-        'tokens': words,
-        'bboxes': [[list(map(int, bbox.split())) for bbox in doc] for doc in bboxes],
-        'ner_tags': [[label2id[tag] for tag in ner_tag] for ner_tag in ner_tags],
-        'image': [Image.open(path).convert("RGB") for path in image_path]
-    }
-    #raw features
-    features = Features({
-        'id': Value(dtype='string', id=None),
-        'tokens': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
-        'bboxes': Sequence(feature=Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None), length=-1, id=None),
-        'ner_tags': Sequence(feature=ClassLabel(num_classes=len(labels), names=labels, names_file=None, id=None), length=-1, id=None),
-        'image': Img(decode=True, id=None)
-    })
-    full_data_set = Dataset.from_dict(dataset_dict, features=features)
-    dataset = full_data_set.train_test_split(test_size=TEST_SIZE)
-    dataset["train"] = dataset["train"].filter(filter_out_unannotated)
-    processor = AutoProcessor.from_pretrained(
-        "microsoft/layoutlmv3-base", apply_ocr=False)
-    features = dataset["train"].features
-    column_names = dataset["train"].column_names
-    image_column_name = "image"
-    text_column_name = "tokens"
-    boxes_column_name = "bboxes"
-    label_column_name = "ner_tags"
-    # In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the
-    # unique labels.
-#     def get_label_list(labels):
-#         unique_labels = set()
-#         for label in labels:
-#             unique_labels = unique_labels | set(label)
-#         label_list = list(unique_labels)
-#         label_list.sort()
-#         return label_list
-#     if isinstance(features[label_column_name].feature, ClassLabel):
-#         label_list = features[label_column_name].feature.names
-#         # No need to convert the labels since they are already ints.
-#         id2label = {k: v for k, v in enumerate(label_list)}
-#         label2id = {v: k for k, v in enumerate(label_list)}
-#     else:
-#         label_list = get_label_list(dataset["train"][label_column_name])
-#         id2label = {k: v for k, v in enumerate(label_list)}
-#         label2id = {v: k for k, v in enumerate(label_list)}
-#     num_labels = len(label_list)
-    # we need to define custom features for `set_format` (used later on) to work properly
-    features = Features({
-        'pixel_values': Array3D(dtype="float32", shape=(3, 224, 224)),
-        'input_ids': Sequence(feature=Value(dtype='int64')),
-        'attention_mask': Sequence(Value(dtype='int64')),
-        'bbox': Array2D(dtype="int64", shape=(512, 4)),
-        'labels': Sequence(ClassLabel(names=labels)),
-    })
-    train_dataset = dataset["train"].map(
-        prepare_examples,
-        batched=True,
-        remove_columns=column_names,
-        features=features,
-    )
-    eval_dataset = dataset["test"].map(
-        prepare_examples,
-        batched=True,
-        remove_columns=column_names,
-        features=features,
-    )
-    train_dataset.set_format("torch")
-    if not OUTPUT_PATH.endswith('/'):
-        OUTPUT_PATH += '/'
-    train_dataset.save_to_disk(f'{OUTPUT_PATH}train_split')
-    eval_dataset.save_to_disk(f'{OUTPUT_PATH}eval_split')
-    dataset.save_to_disk(f'{OUTPUT_PATH}raw_data')

static/inference/run_inference.py DELETED Viewed

@@ -1,27 +0,0 @@
-import argparse
-from asyncio.log import logger
-from Layoutlmv3_inference.ocr import prepare_batch_for_inference
-from Layoutlmv3_inference.inference_handler import handle
-import logging
-import os
-if __name__ == "__main__":
-    try:
-        parser = argparse.ArgumentParser()
-        parser.add_argument("--model_path", type=str)
-        parser.add_argument("--images_path", type=str)
-        args, _ = parser.parse_known_args()
-        images_path = args.images_path
-        image_files = os.listdir(images_path)
-        images_path = [images_path + '/' + image_files[0]]
-        inference_batch = prepare_batch_for_inference(images_path)
-        context = {"model_dir": args.model_path}
-        handle(inference_batch,context)
-    except Exception as err:
-        os.makedirs('log', exist_ok=True)
-        logging.basicConfig(filename='log/error_output.log', level=logging.ERROR,
-                            format='%(asctime)s %(levelname)s %(name)s %(message)s')
-        logger = logging.getLogger(__name__)
-        logger.error(err)