Spaces:

gatilin
/

mmocr-webui

Runtime error

App Files Files Community

isLinXu commited on Sep 10, 2023

Commit

c553e79

1 Parent(s): 9223de5

update

Browse files

Files changed (1) hide show

app.py +185 -55

app.py CHANGED Viewed

@@ -1,28 +1,27 @@
 import os
-os.system("pip install xtcocotools>=1.12")
 os.system("pip install 'mmengine>=0.6.0'")
 os.system("pip install 'mmcv>=2.0.0rc4,<2.1.0'")
 os.system("pip install 'mmdet>=3.0.0,<4.0.0'")
-os.system("pip install 'mmpose'")
 import PIL
 import cv2
-import mmpose
 import numpy as np
 import torch
-from mmpose.apis import MMPoseInferencer
-import gradio as gr
 import warnings
 warnings.filterwarnings("ignore")
-mmpose_model_list = ["human", "hand", "face", "animal", "wholebody",
-                     "vitpose", "vitpose-s", "vitpose-b", "vitpose-l", "vitpose-h"]
 def save_image(img, img_path):
     # Convert PIL image to OpenCV image
@@ -31,52 +30,183 @@ def save_image(img, img_path):
     cv2.imwrite(img_path, img)
 def download_test_image():
     # Images
     torch.hub.download_url_to_file(
-        'https://user-images.githubusercontent.com/59380685/266264420-21575a83-4057-41cf-8a4a-b3ea6f332d79.jpg',
-        'bus.jpg')
     torch.hub.download_url_to_file(
-        'https://user-images.githubusercontent.com/59380685/266264536-82afdf58-6b9a-4568-b9df-551ee72cb6d9.jpg',
-        'dogs.jpg')
     torch.hub.download_url_to_file(
-        'https://user-images.githubusercontent.com/59380685/266264600-9d0c26ca-8ba6-45f2-b53b-4dc98460c43e.jpg',
-        'zidane.jpg')
-def predict_pose(img, model_name, out_dir):
-    img_path = "input_img.jpg"
-    save_image(img, img_path)
-    device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu'
-    inferencer = MMPoseInferencer(model_name, device=device)
-    result_generator = inferencer(img_path, show=False, out_dir=out_dir)
-    result = next(result_generator)
-    save_dir = './output/visualizations/'
-    if os.path.exists(save_dir):
-        out_img_path = save_dir + img_path
-        print("out_img_path: ", out_img_path)
-    else:
-        out_img_path = img_path
-    out_img = PIL.Image.open(out_img_path)
-    return out_img
-download_test_image()
-input_image = gr.inputs.Image(type='pil', label="Original Image")
-model_name = gr.inputs.Dropdown(choices=[m for m in mmpose_model_list], label='Model')
-out_dir = gr.inputs.Textbox(label="Output Directory", default="./output")
-output_image = gr.outputs.Image(type="pil", label="Output Image")
-examples = [
-    ['zidane.jpg', 'human'],
-    ['dogs.jpg', 'animal'],
-]
-title = "MMPose detection web demo"
-description = "<div align='center'><img src='https://raw.githubusercontent.com/open-mmlab/mmpose/main/resources/mmpose-logo.png' width='450''/><div>" \
-              "<p style='text-align: center'><a href='https://github.com/open-mmlab/mmpose'>MMPose</a> MMPose 是一款基于 PyTorch 的姿态分析的开源工具箱，是 OpenMMLab 项目的成员之一。" \
-              "OpenMMLab Pose Estimation Toolbox and Benchmark..</p>"
-article = "<p style='text-align: center'><a href='https://github.com/open-mmlab/mmpose'>MMPose</a></p>" \
-          "<p style='text-align: center'><a href='https://github.com/isLinXu'>gradio build by gatilin</a></a></p>"
-iface = gr.Interface(fn=predict_pose, inputs=[input_image, model_name, out_dir], outputs=output_image,
-                     examples=examples, title=title, description=description, article=article)
-iface.launch()

 import os
+os.system("pip install gradio==3.42.0")
 os.system("pip install 'mmengine>=0.6.0'")
 os.system("pip install 'mmcv>=2.0.0rc4,<2.1.0'")
 os.system("pip install 'mmdet>=3.0.0,<4.0.0'")
+os.system("pip install mmocr")
+import json
+import os
+from argparse import ArgumentParser
 import PIL
 import cv2
+import gradio as gr
 import numpy as np
 import torch
+from PIL.Image import Image
+from mmocr.apis.inferencers import MMOCRInferencer
 import warnings
 warnings.filterwarnings("ignore")
 def save_image(img, img_path):
     # Convert PIL image to OpenCV image
     cv2.imwrite(img_path, img)
+textdet_model_list = ['DBNet', 'DRRG', 'FCENet', 'PANet', 'PSENet', 'TextSnake', 'MaskRCNN']
+textrec_model_list = ['ABINet', 'ASTER', 'CRNN', 'MASTER', 'NRTR', 'RobustScanner', 'SARNet', 'SATRN', 'SVTR']
+textkie_model_list = ['SDMGR']
+def ocr_inference(inputs, out_dir, det, det_weights, rec, rec_weights, kie, kie_weights, device, batch_size):
+    init_args, call_args = parse_args()
+    inputs = np.array(inputs)
+    img_path = "demo_text_ocr.jpg"
+    save_image(inputs, img_path)
+    if det is not None and rec is not None:
+        init_args['det'] = det
+        init_args['det_weights'] = None
+        init_args['rec'] = rec
+        init_args['rec_weights'] = None
+    elif det_weights is not None and rec_weights is not None:
+        init_args['det'] = None
+        init_args['det_weights'] = det_weights
+        init_args['rec'] = None
+        init_args['rec_weights'] = rec_weights
+    call_args['inputs'] = img_path
+    call_args['out_dir'] = out_dir
+    call_args['batch_size'] = int(batch_size)
+    call_args['show'] = False
+    call_args['save_pred'] = True
+    call_args['save_vis'] = True
+    init_args['device'] = device
+    print("init_args", init_args)
+    print("call_args", call_args)
+    ocr = MMOCRInferencer(**init_args)
+    ocr(**call_args)
+    save_vis_dir = '../../results/vis/'
+    save_pred_dir = '../../results/preds/'
+    img_out = PIL.Image.open(os.path.join(save_vis_dir, img_path))
+    json_out = json.load(open(os.path.join(save_pred_dir, img_path.replace('.jpg', '.json'))))
+    return img_out, json_out
 def download_test_image():
     # Images
     torch.hub.download_url_to_file(
+        'https://user-images.githubusercontent.com/59380685/266821429-9a897c0a-5b02-4260-a65b-3514b758f6b6.jpg',
+        'demo_densetext_det.jpg')
+    torch.hub.download_url_to_file(
+        'https://user-images.githubusercontent.com/59380685/266821432-17bb0646-a3e9-451e-9b4d-6e41ce4c3f0c.jpg',
+        'demo_text_recog.jpg')
+    torch.hub.download_url_to_file(
+        'https://user-images.githubusercontent.com/59380685/266821434-fe0d4d18-f3e2-4acf-baf5-0d2e318f0b09.jpg',
+        'demo_text_ocr.jpg')
     torch.hub.download_url_to_file(
+        'https://user-images.githubusercontent.com/59380685/266821435-5d7af2b4-cb84-4355-91cb-37d90e91aa30.jpg',
+        'demo_text_det.jpg')
     torch.hub.download_url_to_file(
+        'https://user-images.githubusercontent.com/59380685/266821436-4790c6c1-2da5-45c7-b837-04eeea0d7264.jpeg',
+        'demo_kie.jpg')
+def parse_args():
+    parser = ArgumentParser()
+    parser.add_argument(
+        '--inputs', type=str, help='Input image file or folder path.')
+    parser.add_argument(
+        '--out-dir',
+        type=str,
+        default='./results/',
+        help='Output directory of results.')
+    parser.add_argument(
+        '--det',
+        type=str,
+        default=None,
+        help='Pretrained text detection algorithm. It\'s the path to the '
+             'config file or the model name defined in metafile.')
+    parser.add_argument(
+        '--det-weights',
+        type=str,
+        default=None,
+        help='Path to the custom checkpoint file of the selected det model. '
+             'If it is not specified and "det" is a model name of metafile, the '
+             'weights will be loaded from metafile.')
+    parser.add_argument(
+        '--rec',
+        type=str,
+        default=None,
+        help='Pretrained text recognition algorithm. It\'s the path to the '
+             'config file or the model name defined in metafile.')
+    parser.add_argument(
+        '--rec-weights',
+        type=str,
+        default=None,
+        help='Path to the custom checkpoint file of the selected recog model. '
+             'If it is not specified and "rec" is a model name of metafile, the '
+             'weights will be loaded from metafile.')
+    parser.add_argument(
+        '--kie',
+        type=str,
+        default=None,
+        help='Pretrained key information extraction algorithm. It\'s the path'
+             'to the config file or the model name defined in metafile.')
+    parser.add_argument(
+        '--kie-weights',
+        type=str,
+        default=None,
+        help='Path to the custom checkpoint file of the selected kie model. '
+             'If it is not specified and "kie" is a model name of metafile, the '
+             'weights will be loaded from metafile.')
+    parser.add_argument(
+        '--device',
+        type=str,
+        default=None,
+        help='Device used for inference. '
+             'If not specified, the available device will be automatically used.')
+    parser.add_argument(
+        '--batch-size', type=int, default=1, help='Inference batch size.')
+    parser.add_argument(
+        '--show',
+        action='store_true',
+        help='Display the image in a popup window.')
+    parser.add_argument(
+        '--print-result',
+        action='store_true',
+        help='Whether to print the results.')
+    parser.add_argument(
+        '--save_pred',
+        action='store_true',
+        help='Save the inference results to out_dir.')
+    parser.add_argument(
+        '--save_vis',
+        action='store_true',
+        help='Save the visualization results to out_dir.')
+    call_args = vars(parser.parse_args())
+    init_kws = [
+        'det', 'det_weights', 'rec', 'rec_weights', 'kie', 'kie_weights', 'device'
+    ]
+    init_args = {}
+    for init_kw in init_kws:
+        init_args[init_kw] = call_args.pop(init_kw)
+    return init_args, call_args
+if __name__ == '__main__':
+    # Define Gradio input and output types
+    input_image = gr.inputs.Image(type="pil", label="Input Image")
+    out_dir = gr.inputs.Textbox(default="results")
+    det = gr.inputs.Dropdown(label="Text Detection Model", choices=[m for m in textdet_model_list], default='DBNet')
+    det_weights = gr.inputs.Textbox(default=None)
+    rec = gr.inputs.Dropdown(label="Text Recognition Model", choices=[m for m in textrec_model_list], default='CRNN')
+    rec_weights = gr.inputs.Textbox(default=None)
+    device = gr.inputs.Radio(choices=["cpu", "cuda"], label="Device used for inference", default="cpu")
+    batch_size = gr.inputs.Number(default=1, label="Inference batch size")
+    output_image = gr.outputs.Image(type="pil", label="Output Image")
+    output_json = gr.outputs.Textbox()
+    download_test_image()
+    examples = [["demo_text_ocr.jpg", "results", "DBNet", None, "CRNN", "cpu", 1],
+                ["demo_text_det.jpg", "results", "FCENet", None, "ASTER", "cpu", 1],
+                ["demo_text_recog.jpg", "results", "PANet", None, "MASTER", "cpu", 1],
+                ["demo_densetext_det.jpg", "results", "PSENet", None, "CRNN", None, "cpu", 1],
+                ["demo_kie.jpg", "results", "TextSnake", None, "RobustScanner", None, "cpu", 1]
+                ]
+    title = "MMOCR web demo"
+    description = "<div align='center'><img src='https://raw.githubusercontent.com/open-mmlab/mmocr/main/resources/mmocr-logo.png' width='450''/><div>" \
+                  "<p style='text-align: center'><a href='https://github.com/open-mmlab/mmocr'>MMOCR</a> MMOCR 是基于 PyTorch 和 mmdetection 的开源工具箱，专注于文本检测，文本识别以及相应的下游任务，如关键信息提取。 它是 OpenMMLab 项目的一部分。" \
+                  "OpenMMLab Text Detection, Recognition and Understanding Toolbox.</p>"
+    article = "<p style='text-align: center'><a href='https://github.com/open-mmlab/mmocr'>MMOCR</a></p>" \
+              "<p style='text-align: center'><a href='https://github.com/isLinXu'>gradio build by gatilin</a></a></p>"
+    # Create Gradio interface
+    iface = gr.Interface(
+        fn=ocr_inference,
+        inputs=[
+            input_image, out_dir, det, det_weights, rec, rec_weights, device, batch_size
+        ],
+        outputs=[output_image, output_json], examples=examples,
+        title=title, description=description, article=article,
+    )
+    # Launch Gradio interface
+    iface.launch()