Spaces:
Sleeping
Sleeping
File size: 5,548 Bytes
d7e58f0 68bb4ad d7e58f0 32fe019 d7e58f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import os
import sys
import subprocess
import pkg_resources
def is_package_installed(package_name):
try:
pkg_resources.get_distribution(package_name)
return True
except pkg_resources.DistributionNotFound:
return False
if is_package_installed("mmcv"):
print("MMCV is installed.")
else:
print("MMCV is not installed. Build it from the source.")
os.environ["MMCV_WITH_OPS"] = "1"
os.environ["FORCE_MLU"] = "1"
subprocess.run(["pip", "install", "-e", "./mmcv"], check=True)
subprocess.run(["pip", "list"], check=True)
if is_package_installed("pytorch3d"):
print("pytorch3d is installed.")
else:
print("pytorch3d is not installed. Build it from the source.")
subprocess.run(["pip", "install", "-e", "./pytorch3d"], check=True)
if is_package_installed("MultiScaleDeformableAttention"):
print("MultiScaleDeformableAttention is installed.")
else:
print("MultiScaleDeformableAttention is not installed. Build it from the source.")
subprocess.run(["pip", "install", "-e", "./models/aios/ops"], check=True)
import os.path as osp
from pathlib import Path
import cv2
import gradio as gr
import torch
import math
import spaces
from huggingface_hub import hf_hub_download
hf_hub_download(repo_id="ttxskk/AiOS", filename="aios_checkpoint.pth", local_dir="/home/user/app/pretrained_models")
OUT_FOLDER = '/home/user/app/demo_out'
os.makedirs(OUT_FOLDER, exist_ok=True)
DEMO_CONFIG = '/home/user/app/config/aios_smplx_demo.py'
MODEL_PATH = '/home/user/app/pretrained_models/aios_checkpoint.pth'
@spaces.GPU(enable_queue=True, duration=300)
def infer(video_input, batch_size, threshold=0.5, num_person=1):
os.system(f'rm -rf {OUT_FOLDER}/*')
os.system(f'torchrun --nproc_per_node 1 \
main.py \
-c {DEMO_CONFIG} \
--options batch_size={batch_size} backbone="resnet50" num_person={num_person} threshold={threshold} \
--resume {MODEL_PATH} \
--eval \
--inference \
--inference_input {video_input} \
--to_vid \
--output_dir {OUT_FOLDER}')
video_path = os.path.join(OUT_FOLDER, 'demo_vid.mp4')
save_path_img = os.path.join(OUT_FOLDER, 'res_img')
save_path_mesh = os.path.join(OUT_FOLDER, 'mesh')
save_mesh_file = os.path.join(OUT_FOLDER, 'mesh.zip')
os.system(f'zip -r {save_mesh_file} {save_path_mesh}')
yield video_path, save_mesh_file
TITLE = """
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
<div>
<h1 align="center">AiOS: All-in-One-Stage Expressive Human Pose and Shape Estimation</h1>
</div>
</div>
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
<div style="display:flex; gap: 0.25rem;" align="center">
<a href="https://ttxskk.github.io/AiOS/" target="_blank"><img src='https://img.shields.io/badge/Project-Page-Green'></a>
<a href="https://github.com/ttxskk/AiOS" target="_blank"><img src='https://img.shields.io/badge/Github-Code-blue'></a>
<a href="https://ttxskk.github.io/AiOS/assets/aios_cvpr24.pdf" target="_blank"><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a>
</div>
</div>
<div style="font-size: 1.1rem; color: #555; max-width: 800px; margin: 1rem auto; line-height: 1.5; justify-content: center; align-items: center; text-align: center;">
<div>
<p>Recover multiple expressive human pose and shape from an RGB image without any additional requirements, such as an off-the-shelf detection model.</h1>
</div>
</div>
"""
VIDEO = '''
<center><iframe width="960" height="540"
src="https://www.youtube.com/embed/yzCL7TYpzvc?si=EoxWNE6VPBxsy7Go"title="AiOS: All-in-One-Stage Expressive Human Pose and Shape Estimation" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen>
</iframe>
</center><br>'''
DESCRIPTION = '''
<p>
Note: If a detected person's score is lower than the score threshold, this detection will be discarded. The default value is 0.3.
</p>
'''
with gr.Blocks(title="AiOS", theme=gr.themes.Soft(primary_hue="blue", secondary_hue="gray")) as demo:
gr.Markdown(TITLE)
with gr.Row():
with gr.Column(scale=2):
video_input = gr.Video(label="Input video", elem_classes="video")
with gr.Column(scale=1):
batch_size = gr.Textbox(label="Batch Size", type="text", value=8)
num_person = gr.Textbox(label="Number of Person", type="text", value=1)
threshold = gr.Slider(0, 1.0, value=0.5, label='Score Threshold')
send_button = gr.Button("Infer")
gr.HTML("""<br/>""")
with gr.Row():
with gr.Column():
# processed_frames = gr.Image(label="Last processed frame")
video_output = gr.Video(elem_classes="video")
with gr.Column():
meshes_output = gr.File(label="3D meshes")
send_button.click(fn=infer, inputs=[video_input, batch_size, threshold, num_person], outputs=[video_output, meshes_output])
# example_videos = gr.Examples([
# ['./assets/01.mp4'],
# ['./assets/02.mp4'],
# ['./assets/03.mp4'],
# ['./assets/04.mp4'],
# ['./assets/05.mp4'],
# ['./assets/06.mp4'],
# ['./assets/07.mp4'],
# ['./assets/08.mp4'],
# ['./assets/09.mp4'],
# ],
# inputs=[video_input, 0.5])
demo.queue().launch(debug=True)
|