Spaces:

gatilin
/

mmocr-webui

Runtime error

App Files Files Community

isLinXu commited on Sep 10, 2023

Commit

9223de5

1 Parent(s): 6c89ef7

update

Browse files

Files changed (1) hide show

app.py +55 -190

app.py CHANGED Viewed

@@ -1,27 +1,29 @@
 import os
-os.system("pip install gradio==3.42.0")
 os.system("pip install 'mmengine>=0.6.0'")
 os.system("pip install 'mmcv>=2.0.0rc4,<2.1.0'")
 os.system("pip install 'mmdet>=3.0.0,<4.0.0'")
-os.system("pip install mmocr")
-import json
-import os
-from argparse import ArgumentParser
 import PIL
 import cv2
-import gradio as gr
 import numpy as np
 import torch
-from PIL.Image import Image
-from mmocr.apis.inferencers import MMOCRInferencer
 import warnings
 warnings.filterwarnings("ignore")
 def save_image(img, img_path):
     # Convert PIL image to OpenCV image
     img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
@@ -29,189 +31,52 @@ def save_image(img, img_path):
     cv2.imwrite(img_path, img)
-textdet_model_list = ['DBNet', 'DRRG', 'FCENet', 'PANet', 'PSENet', 'TextSnake', 'MaskRCNN']
-textrec_model_list = ['ABINet', 'ASTER', 'CRNN', 'MASTER', 'NRTR', 'RobustScanner', 'SARNet', 'SATRN', 'SVTR']
-textkie_model_list = ['SDMGR','SDMGR']
-def ocr_inference(inputs, out_dir, det, det_weights, rec, rec_weights, device):
-    init_args, call_args = parse_args()
-    inputs = np.array(inputs)
-    img_path = "demo_text_ocr.jpg"
-    save_image(inputs, img_path)
-    if det is not None and rec is not None:
-        init_args['det'] = det
-        init_args['det_weights'] = None
-        init_args['rec'] = rec
-        init_args['rec_weights'] = None
-    elif det_weights is not None and rec_weights is not None:
-        init_args['det'] = None
-        init_args['det_weights'] = det_weights
-        init_args['rec'] = None
-        init_args['rec_weights'] = rec_weights
-    call_args['inputs'] = img_path
-    call_args['out_dir'] = out_dir
-    call_args['batch_size'] = 1
-    call_args['show'] = False
-    call_args['save_pred'] = True
-    call_args['save_vis'] = True
-    init_args['device'] = device
-    print("init_args", init_args)
-    print("call_args", call_args)
-    ocr = MMOCRInferencer(**init_args)
-    ocr(**call_args)
-    save_vis_dir = './results/vis/'
-    save_pred_dir = './results/preds/'
-    img_out = PIL.Image.open(os.path.join(save_vis_dir, img_path))
-    json_out = json.load(open(os.path.join(save_pred_dir, img_path.replace('.jpg', '.json'))))
-    return img_out, json_out
 def download_test_image():
     # Images
     torch.hub.download_url_to_file(
-        'https://user-images.githubusercontent.com/59380685/266821429-9a897c0a-5b02-4260-a65b-3514b758f6b6.jpg',
-        'demo_densetext_det.jpg')
-    torch.hub.download_url_to_file(
-        'https://user-images.githubusercontent.com/59380685/266821432-17bb0646-a3e9-451e-9b4d-6e41ce4c3f0c.jpg',
-        'demo_text_recog.jpg')
-    torch.hub.download_url_to_file(
-        'https://user-images.githubusercontent.com/59380685/266821434-fe0d4d18-f3e2-4acf-baf5-0d2e318f0b09.jpg',
-        'demo_text_ocr.jpg')
     torch.hub.download_url_to_file(
-        'https://user-images.githubusercontent.com/59380685/266821435-5d7af2b4-cb84-4355-91cb-37d90e91aa30.jpg',
-        'demo_text_det.jpg')
     torch.hub.download_url_to_file(
-        'https://user-images.githubusercontent.com/59380685/266821436-4790c6c1-2da5-45c7-b837-04eeea0d7264.jpeg',
-        'demo_kie.jpg')
-def parse_args():
-    parser = ArgumentParser()
-    parser.add_argument(
-        '--inputs', type=str, help='Input image file or folder path.')
-    parser.add_argument(
-        '--out-dir',
-        type=str,
-        default='./results/',
-        help='Output directory of results.')
-    parser.add_argument(
-        '--det',
-        type=str,
-        default=None,
-        help='Pretrained text detection algorithm. It\'s the path to the '
-             'config file or the model name defined in metafile.')
-    parser.add_argument(
-        '--det-weights',
-        type=str,
-        default=None,
-        help='Path to the custom checkpoint file of the selected det model. '
-             'If it is not specified and "det" is a model name of metafile, the '
-             'weights will be loaded from metafile.')
-    parser.add_argument(
-        '--rec',
-        type=str,
-        default=None,
-        help='Pretrained text recognition algorithm. It\'s the path to the '
-             'config file or the model name defined in metafile.')
-    parser.add_argument(
-        '--rec-weights',
-        type=str,
-        default=None,
-        help='Path to the custom checkpoint file of the selected recog model. '
-             'If it is not specified and "rec" is a model name of metafile, the '
-             'weights will be loaded from metafile.')
-    parser.add_argument(
-        '--kie',
-        type=str,
-        default=None,
-        help='Pretrained key information extraction algorithm. It\'s the path'
-             'to the config file or the model name defined in metafile.')
-    parser.add_argument(
-        '--kie-weights',
-        type=str,
-        default=None,
-        help='Path to the custom checkpoint file of the selected kie model. '
-             'If it is not specified and "kie" is a model name of metafile, the '
-             'weights will be loaded from metafile.')
-    parser.add_argument(
-        '--device',
-        type=str,
-        default=None,
-        help='Device used for inference. '
-             'If not specified, the available device will be automatically used.')
-    parser.add_argument(
-        '--batch-size', type=int, default=1, help='Inference batch size.')
-    parser.add_argument(
-        '--show',
-        action='store_true',
-        help='Display the image in a popup window.')
-    parser.add_argument(
-        '--print-result',
-        action='store_true',
-        help='Whether to print the results.')
-    parser.add_argument(
-        '--save_pred',
-        action='store_true',
-        help='Save the inference results to out_dir.')
-    parser.add_argument(
-        '--save_vis',
-        action='store_true',
-        help='Save the visualization results to out_dir.')
-    call_args = vars(parser.parse_args())
-    init_kws = [
-        'det', 'det_weights', 'rec', 'rec_weights', 'kie', 'kie_weights', 'device'
-    ]
-    init_args = {}
-    for init_kw in init_kws:
-        init_args[init_kw] = call_args.pop(init_kw)
-    return init_args, call_args
-if __name__ == '__main__':
-    # Define Gradio input and output types
-    input_image = gr.inputs.Image(type="pil", label="Input Image")
-    out_dir = gr.inputs.Textbox(default="results")
-    det = gr.inputs.Dropdown(label="Text Detection Model", choices=[m for m in textdet_model_list], default='DBNet')
-    det_weights = gr.inputs.Textbox(default=None)
-    rec = gr.inputs.Dropdown(label="Text Recognition Model", choices=[m for m in textrec_model_list], default='CRNN')
-    rec_weights = gr.inputs.Textbox(default=None)
-    kie = gr.inputs.Textbox(default='SDMGR')
-    # kie = gr.inputs.Dropdown(label="Key Information Extraction Model", choices=[m for m in textkie_model_list],
-    #                          default='SDMGR')
-    # kie_weights = gr.inputs.Textbox(default=None)
-    device = gr.inputs.Radio(choices=["cpu", "cuda"], label="Device used for inference", default="cpu")
-    batch_size = gr.inputs.Number(default=1, label="Inference batch size")
-    output_image = gr.outputs.Image(type="pil", label="Output Image")
-    output_json = gr.outputs.Textbox()
-    download_test_image()
-    examples = [["demo_text_ocr.jpg", "results", "DBNet", None, "CRNN", None, "cpu"],
-                ["demo_text_det.jpg", "results", "FCENet", None, "ASTER", None, "cpu"],
-                ["demo_text_recog.jpg", "results", "DBNet", None, "MASTER", None, "cpu"],
-                ["demo_densetext_det.jpg", "results", "PSENet", None, "CRNN", None, "cpu"],
-                ["demo_kie.jpg", "results", "TextSnake", None, "RobustScanner", None, "cpu"]
-                ]
-    title = "MMOCR web demo"
-    description = "<div align='center'><img src='https://raw.githubusercontent.com/open-mmlab/mmocr/main/resources/mmocr-logo.png' width='450''/><div>" \
-                  "<p style='text-align: center'><a href='https://github.com/open-mmlab/mmocr'>MMOCR</a> MMOCR 是基于 PyTorch 和 mmdetection 的开源工具箱，专注于文本检测，文本识别以及相应的下游任务，如关键信息提取。 它是 OpenMMLab 项目的一部分。" \
-                  "OpenMMLab Text Detection, Recognition and Understanding Toolbox.</p>"
-    article = "<p style='text-align: center'><a href='https://github.com/open-mmlab/mmocr'>MMOCR</a></p>" \
-              "<p style='text-align: center'><a href='https://github.com/isLinXu'>gradio build by gatilin</a></a></p>"
-    # Create Gradio interface
-    iface = gr.Interface(
-        fn=ocr_inference,
-        inputs=[
-            input_image, out_dir, det, det_weights, rec, rec_weights, device, batch_size
-        ],
-        outputs=[output_image, output_json], examples=examples,
-        title=title, description=description, article=article,
-    )
-    # Launch Gradio interface
-    iface.launch()

 import os
+os.system("pip install xtcocotools>=1.12")
 os.system("pip install 'mmengine>=0.6.0'")
 os.system("pip install 'mmcv>=2.0.0rc4,<2.1.0'")
 os.system("pip install 'mmdet>=3.0.0,<4.0.0'")
+os.system("pip install 'mmpose'")
 import PIL
 import cv2
+import mmpose
 import numpy as np
 import torch
+from mmpose.apis import MMPoseInferencer
+import gradio as gr
 import warnings
 warnings.filterwarnings("ignore")
+mmpose_model_list = ["human", "hand", "face", "animal", "wholebody",
+                     "vitpose", "vitpose-s", "vitpose-b", "vitpose-l", "vitpose-h"]
 def save_image(img, img_path):
     # Convert PIL image to OpenCV image
     img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
     cv2.imwrite(img_path, img)
 def download_test_image():
     # Images
     torch.hub.download_url_to_file(
+        'https://user-images.githubusercontent.com/59380685/266264420-21575a83-4057-41cf-8a4a-b3ea6f332d79.jpg',
+        'bus.jpg')
     torch.hub.download_url_to_file(
+        'https://user-images.githubusercontent.com/59380685/266264536-82afdf58-6b9a-4568-b9df-551ee72cb6d9.jpg',
+        'dogs.jpg')
     torch.hub.download_url_to_file(
+        'https://user-images.githubusercontent.com/59380685/266264600-9d0c26ca-8ba6-45f2-b53b-4dc98460c43e.jpg',
+        'zidane.jpg')
+def predict_pose(img, model_name, out_dir):
+    img_path = "input_img.jpg"
+    save_image(img, img_path)
+    device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu'
+    inferencer = MMPoseInferencer(model_name, device=device)
+    result_generator = inferencer(img_path, show=False, out_dir=out_dir)
+    result = next(result_generator)
+    save_dir = './output/visualizations/'
+    if os.path.exists(save_dir):
+        out_img_path = save_dir + img_path
+        print("out_img_path: ", out_img_path)
+    else:
+        out_img_path = img_path
+    out_img = PIL.Image.open(out_img_path)
+    return out_img
+download_test_image()
+input_image = gr.inputs.Image(type='pil', label="Original Image")
+model_name = gr.inputs.Dropdown(choices=[m for m in mmpose_model_list], label='Model')
+out_dir = gr.inputs.Textbox(label="Output Directory", default="./output")
+output_image = gr.outputs.Image(type="pil", label="Output Image")
+examples = [
+    ['zidane.jpg', 'human'],
+    ['dogs.jpg', 'animal'],
+]
+title = "MMPose detection web demo"
+description = "<div align='center'><img src='https://raw.githubusercontent.com/open-mmlab/mmpose/main/resources/mmpose-logo.png' width='450''/><div>" \
+              "<p style='text-align: center'><a href='https://github.com/open-mmlab/mmpose'>MMPose</a> MMPose 是一款基于 PyTorch 的姿态分析的开源工具箱，是 OpenMMLab 项目的成员之一。" \
+              "OpenMMLab Pose Estimation Toolbox and Benchmark..</p>"
+article = "<p style='text-align: center'><a href='https://github.com/open-mmlab/mmpose'>MMPose</a></p>" \
+          "<p style='text-align: center'><a href='https://github.com/isLinXu'>gradio build by gatilin</a></a></p>"
+iface = gr.Interface(fn=predict_pose, inputs=[input_image, model_name, out_dir], outputs=output_image,
+                     examples=examples, title=title, description=description, article=article)
+iface.launch()