Spaces:
Running
on
L40S
Running
on
L40S
File size: 5,723 Bytes
d7e58f0 c434dc1 d7e58f0 68bb4ad d7e58f0 32fe019 f6cbd78 32fe019 7e3060e 32fe019 d7e58f0 f6cbd78 d7e58f0 7e3060e d7e58f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import os
import sys
import subprocess
import pkg_resources
def is_package_installed(package_name):
try:
pkg_resources.get_distribution(package_name)
return True
except pkg_resources.DistributionNotFound:
return False
if is_package_installed("mmcv"):
print("MMCV is installed.")
else:
print("MMCV is not installed. Build it from the source.")
os.environ["MMCV_WITH_OPS"] = "1"
os.environ["FORCE_MLU"] = "1"
subprocess.run(["pip", "install", "-e", "./mmcv"], check=True)
subprocess.run(["pip", "list"], check=True)
if is_package_installed("pytorch3d"):
print("pytorch3d is installed.")
else:
print("pytorch3d is not installed. Build it from the source.")
subprocess.run(["pip", "install", "-e", "./pytorch3d"], check=True)
if is_package_installed("MultiScaleDeformableAttention"):
print("MultiScaleDeformableAttention is installed.")
else:
print("MultiScaleDeformableAttention is not installed. Build it from the source.")
subprocess.run(["pip", "install", "-e", "./models/aios/ops"], check=True)
import os.path as osp
from pathlib import Path
import cv2
import gradio as gr
import torch
import math
import spaces
from huggingface_hub import hf_hub_download
hf_hub_download(repo_id="ttxskk/AiOS", filename="aios_checkpoint.pth", local_dir="/home/user/app/pretrained_models")
OUT_FOLDER = '/home/user/app/demo_out'
os.makedirs(OUT_FOLDER, exist_ok=True)
DEMO_CONFIG = '/home/user/app/config/aios_smplx_demo.py'
MODEL_PATH = '/home/user/app/pretrained_models/aios_checkpoint.pth'
@spaces.GPU(enable_queue=True, duration=300)
def infer(video_input, batch_size, threshold=0.3, num_person=1):
os.system(f'rm -rf {OUT_FOLDER}/*')
os.system(f'torchrun --nproc_per_node 1 \
main.py \
-c {DEMO_CONFIG} \
--options batch_size={batch_size} backbone="resnet50" num_person={num_person} threshold={threshold} \
--resume {MODEL_PATH} \
--eval \
--inference \
--inference_input {video_input} \
--to_vid \
--output_dir {OUT_FOLDER}')
video_path = os.path.join(OUT_FOLDER, 'demo_vid.mp4')
save_path_img = os.path.join(OUT_FOLDER, 'res_img')
save_path_mesh = os.path.join(OUT_FOLDER, 'mesh')
save_mesh_file = os.path.join(OUT_FOLDER, 'mesh.zip')
os.system(f'zip -r {save_mesh_file} {save_path_mesh}')
yield video_path, save_mesh_file
TITLE = """
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
<div>
<h1 align="center">AiOS: All-in-One-Stage Expressive Human Pose and Shape Estimation</h1>
</div>
</div>
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
<div style="display:flex; gap: 0.25rem;" align="center">
<a href="https://ttxskk.github.io/AiOS/" target="_blank"><img src='https://img.shields.io/badge/Project-Page-Green'></a>
<a href="https://github.com/ttxskk/AiOS" target="_blank"><img src='https://img.shields.io/badge/Github-Code-blue'></a>
<a href="https://ttxskk.github.io/AiOS/assets/aios_cvpr24.pdf" target="_blank"><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a>
</div>
</div>
<div style="font-size: 1.1rem; color: #555; max-width: 800px; margin: 1rem auto; line-height: 1.5; justify-content: center; align-items: center; text-align: center;">
<div>
<p>Recover multiple expressive human pose and shape from an RGB image without any additional requirements, such as an off-the-shelf detection model.</h1>
</div>
</div>
"""
VIDEO = '''
<center>
<iframe width="960" height="540"
src="https://www.youtube.com/embed/yzCL7TYpzvc?si=EoxWNE6VPBxsy7Go"
title="AiOS: All-in-One-Stage Expressive Human Pose and Shape Estimation"
frameborder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
allowfullscreen>
</iframe>
</center><br/>
'''
DESCRIPTION = '''
<p>
Note: Score threshold defines the minimum confidence level for person detection. The default value is 0.3.
If the confidence score of a detected person falls below this score threshold, the detection will be discarded.
</p>
'''
with gr.Blocks(title="AiOS", theme=gr.themes.Soft(primary_hue="blue", secondary_hue="gray")) as demo:
gr.Markdown(TITLE)
gr.HTML(VIDEO)
gr.Markdown(DESCRIPTION)
with gr.Row():
with gr.Column(scale=2):
video_input = gr.Video(label="Input video", elem_classes="video")
with gr.Column(scale=1):
batch_size = gr.Textbox(label="Batch Size", type="text", value=8)
num_person = gr.Textbox(label="Number of Person", type="text", value=1)
threshold = gr.Slider(0, 1.0, value=0.3, label='Score Threshold')
send_button = gr.Button("Infer")
gr.HTML("""<br/>""")
with gr.Row():
with gr.Column():
# processed_frames = gr.Image(label="Last processed frame")
video_output = gr.Video(elem_classes="video")
with gr.Column():
meshes_output = gr.File(label="3D meshes")
send_button.click(fn=infer, inputs=[video_input, batch_size, threshold, num_person], outputs=[video_output, meshes_output])
# example_videos = gr.Examples([
# ['./assets/01.mp4'],
# ['./assets/02.mp4'],
# ['./assets/03.mp4'],
# ['./assets/04.mp4'],
# ['./assets/05.mp4'],
# ['./assets/06.mp4'],
# ['./assets/07.mp4'],
# ['./assets/08.mp4'],
# ['./assets/09.mp4'],
# ],
# inputs=[video_input, 0.5])
demo.queue().launch(debug=True)
|