Spaces:

alexyywwdd
/

postometro-free-demo

Sleeping

App Files Files Community

imabackstabber commited on Mar 12, 2024

Commit

ed25de7

1 Parent(s): 0f987d3

test mmdet pipeline

Browse files

Files changed (3) hide show

app.py +7 -5
assets/04.jpg +0 -0
main/inference.py +1 -1

app.py CHANGED Viewed

@@ -32,7 +32,7 @@ def infer(image_input, in_threshold=0.5, num_people="Single person", render_mesh
     inferer = Inferer(DEFAULT_MODEL, num_gpus, OUT_FOLDER)
     os.system(f'rm -rf {OUT_FOLDER}/*')
     multi_person = False if (num_people == "Single person") else True
-    vis_img, bbox = inferer.infer(image_input, in_threshold, 0, multi_person, not(render_mesh))
     # cap = cv2.VideoCapture(video_input)
     # fps = math.ceil(cap.get(5))
@@ -67,7 +67,7 @@ def infer(image_input, in_threshold=0.5, num_people="Single person", render_mesh
     # os.system(f'zip -r {save_mesh_file} {save_path_mesh}')
     # os.system(f'zip -r {save_smplx_file} {save_path_smplx}')
     # yield img, video_path, save_mesh_file, save_smplx_file
-    return vis_img, "bbox meta: {}".format(bbox)
 TITLE = '''<h1 align="center">PostoMETRO: Pose Token Enhanced Mesh Transformer for Robust 3D Human Mesh Recovery</h1>'''
 DESCRIPTION = '''
@@ -86,7 +86,8 @@ with gr.Blocks(title="PostoMETRO", css=".gradio-container") as demo:
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(label="Input image", elem_classes="Image")
-            threshold = gr.Slider(0, 1.0, value=0.5, label='BBox detection threshold')
             num_people = gr.Radio(
                 choices=["Single person", "Multiple people"],
                 value="Single person",
@@ -96,7 +97,7 @@ with gr.Blocks(title="PostoMETRO", css=".gradio-container") as demo:
                 scale=1,)
             mesh_as_vertices = gr.Checkbox(
                 label="Render as mesh",
-                info="By default, the estimated SMPL-X parameters are rendered as vertices for faster visualization. Check this option if you want to visualize meshes instead.",
                 interactive=True,
                 scale=1,)
             send_button = gr.Button("Infer")
@@ -111,8 +112,9 @@ with gr.Blocks(title="PostoMETRO", css=".gradio-container") as demo:
         ['/home/user/app/assets/01.jpg'],
         ['/home/user/app/assets/02.jpg'],
         ['/home/user/app/assets/03.jpg'],
         ],
-        inputs=[image_input, 0.5])
 #demo.queue()
 demo.queue().launch(debug=True)

     inferer = Inferer(DEFAULT_MODEL, num_gpus, OUT_FOLDER)
     os.system(f'rm -rf {OUT_FOLDER}/*')
     multi_person = False if (num_people == "Single person") else True
+    vis_img, num_bbox, mmdet_box = inferer.infer(image_input, in_threshold, 0, multi_person, not(render_mesh))
     # cap = cv2.VideoCapture(video_input)
     # fps = math.ceil(cap.get(5))
     # os.system(f'zip -r {save_mesh_file} {save_path_mesh}')
     # os.system(f'zip -r {save_smplx_file} {save_path_smplx}')
     # yield img, video_path, save_mesh_file, save_smplx_file
+    return vis_img, "bbox num: {}, bbox meta: {}".format(num_bbox, mmdet_box)
 TITLE = '''<h1 align="center">PostoMETRO: Pose Token Enhanced Mesh Transformer for Robust 3D Human Mesh Recovery</h1>'''
 DESCRIPTION = '''
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(label="Input image", elem_classes="Image")
+            threshold = gr.Slider(0, 1.0, value=0.2, label='BBox detection threshold',
+                                  info="PostoMETRO will take in cropped bboxes as input to produce human mesh. A small threshold will prevent redundant bboxes and vice versa.")
             num_people = gr.Radio(
                 choices=["Single person", "Multiple people"],
                 value="Single person",
                 scale=1,)
             mesh_as_vertices = gr.Checkbox(
                 label="Render as mesh",
+                info="By default, the estimated SMPL parameters are rendered as vertices for faster visualization. Check this option if you want to visualize meshes instead.",
                 interactive=True,
                 scale=1,)
             send_button = gr.Button("Infer")
         ['/home/user/app/assets/01.jpg'],
         ['/home/user/app/assets/02.jpg'],
         ['/home/user/app/assets/03.jpg'],
+        ['/home/user/app/assets/04.jpg'],
         ],
+        inputs=[image_input, 0.2])
 #demo.queue()
 demo.queue().launch(debug=True)

assets/04.jpg ADDED Viewed

main/inference.py CHANGED Viewed

@@ -142,5 +142,5 @@ class Inferer:
             # vis_img = render_mesh(vis_img, mesh, smpl_x.face, {'focal': focal, 'princpt': princpt},
             #                       mesh_as_vertices=mesh_as_vertices)
             # vis_img = vis_img.astype('uint8')
-        return vis_img, bbox

             # vis_img = render_mesh(vis_img, mesh, smpl_x.face, {'focal': focal, 'princpt': princpt},
             #                       mesh_as_vertices=mesh_as_vertices)
             # vis_img = vis_img.astype('uint8')
+        return vis_img, num_bbox, mmdet_box