Spaces:
Running
Running
import gradio as gr | |
import numpy as np | |
import cv2 | |
from PIL import Image | |
from PIFuHD.data import EvalWMetaDataset | |
from PIFuHD.data.ImageBundle import ImageBundle | |
from PIFuHD.options import BaseOptions | |
from PIFuHD.recontructor import Reconstructor | |
from huggingface_hub import hf_hub_download | |
from human_pose_estimator import PoseEstimator | |
from estimator import rect | |
REPO_ID = "cxeep/PIFuHD" | |
pose_estimator = PoseEstimator("cpu") | |
checkpoint_path = hf_hub_download(repo_id=REPO_ID, filename="pifuhd.pt") | |
cmd = [ | |
'--dataroot', './data', | |
'--results_path', './results', | |
'--loadSize', '1024', | |
'--resolution', '256', | |
'--load_netMR_checkpoint_path', checkpoint_path, | |
'--start_id', '-1', | |
'--end_id', '-1' | |
] | |
parser = BaseOptions() | |
opts = parser.parse(cmd) | |
reconstructor = Reconstructor(opts) | |
def make_bundle(image, name): | |
image, rects = rect(pose_estimator, image) | |
return ImageBundle(img=image, name=name, meta=rects) | |
def process_video(video_path): | |
frames = [] | |
cap = cv2.VideoCapture(video_path) | |
while cap.isOpened(): | |
ret, frame = cap.read() | |
if not ret: | |
break | |
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
frames.append(Image.fromarray(frame)) | |
cap.release() | |
models = [] | |
for frame in frames: | |
bundle = make_bundle(np.array(frame), "Model3D") | |
dataset = EvalWMetaDataset(opts, [bundle]) | |
model = reconstructor.evaluate(dataset) | |
models.append(model) | |
# TODO: Combine models into animation | |
output_animation = models[0] # Placeholder, replace with actual animation | |
return output_animation | |
def predict(input_img, input_video): | |
if input_video: | |
return process_video(input_video) | |
elif input_img is not None: | |
bundle = make_bundle(input_img, "Model3D") | |
dataset = EvalWMetaDataset(opts, [bundle]) | |
return reconstructor.evaluate(dataset) | |
else: | |
return None, None | |
footer = r""" | |
""" | |
with gr.Blocks(title="PIFuHD") as app: | |
gr.HTML("<center><h1>3D Human Digitization</h1></center>") | |
gr.HTML("<center><h3>Multi-Level Pixel-Aligned Implicit Function for High-Resolution 3D Models</h3></center>") | |
with gr.Row(equal_height=False): | |
with gr.Column(): | |
input_img = gr.Image(type="numpy", label="Input image") | |
input_video = gr.Video(label="Input Video") | |
run_btn = gr.Button(variant="primary") | |
with gr.Column(): | |
output_obj = gr.Model3D(label="Output model") | |
output_img = gr.Image(type="filepath", label="Output image") | |
gr.ClearButton(components=[input_img, input_video, output_img, output_obj], variant="stop") | |
run_btn.click(predict, [input_img, input_video], [output_img, output_obj]) | |
with gr.Row(): | |
blobs = [[f"examples/{x:02d}.png"] for x in range(1, 4)] | |
examples = gr.Dataset(components=[input_img], samples=blobs) | |
examples.click(lambda x: x[0], [examples], [input_img]) | |
with gr.Row(): | |
gr.HTML(footer) | |
app.launch(share=False, debug=True, show_error=True) | |
app.queue() |