Spaces:
Running
Running
File size: 3,115 Bytes
9b87edc 478d982 9b87edc 478d982 b3eb10d 478d982 b3eb10d 9b87edc dd9544d 9b87edc dd9544d 478d982 9b87edc b3eb10d 9b87edc 478d982 9b87edc 88e3c7c 9b87edc 478d982 9b87edc 478d982 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import gradio as gr
import numpy as np
import cv2
from PIL import Image
from PIFuHD.data import EvalWMetaDataset
from PIFuHD.data.ImageBundle import ImageBundle
from PIFuHD.options import BaseOptions
from PIFuHD.recontructor import Reconstructor
from huggingface_hub import hf_hub_download
from human_pose_estimator import PoseEstimator
from estimator import rect
REPO_ID = "cxeep/PIFuHD"
pose_estimator = PoseEstimator("cpu")
checkpoint_path = hf_hub_download(repo_id=REPO_ID, filename="pifuhd.pt")
cmd = [
'--dataroot', './data',
'--results_path', './results',
'--loadSize', '1024',
'--resolution', '256',
'--load_netMR_checkpoint_path', checkpoint_path,
'--start_id', '-1',
'--end_id', '-1'
]
parser = BaseOptions()
opts = parser.parse(cmd)
reconstructor = Reconstructor(opts)
def make_bundle(image, name):
image, rects = rect(pose_estimator, image)
return ImageBundle(img=image, name=name, meta=rects)
def process_video(video_path):
frames = []
cap = cv2.VideoCapture(video_path)
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frames.append(Image.fromarray(frame))
cap.release()
models = []
for frame in frames:
bundle = make_bundle(np.array(frame), "Model3D")
dataset = EvalWMetaDataset(opts, [bundle])
model = reconstructor.evaluate(dataset)
models.append(model)
# TODO: Combine models into animation
output_animation = models[0] # Placeholder, replace with actual animation
return output_animation
def predict(input_img, input_video):
if input_video:
return process_video(input_video)
elif input_img is not None:
bundle = make_bundle(input_img, "Model3D")
dataset = EvalWMetaDataset(opts, [bundle])
return reconstructor.evaluate(dataset)
else:
return None, None
footer = r"""
"""
with gr.Blocks(title="PIFuHD") as app:
gr.HTML("<center><h1>3D Human Digitization</h1></center>")
gr.HTML("<center><h3>Multi-Level Pixel-Aligned Implicit Function for High-Resolution 3D Models</h3></center>")
with gr.Row(equal_height=False):
with gr.Column():
input_img = gr.Image(type="numpy", label="Input image")
input_video = gr.Video(label="Input Video")
run_btn = gr.Button(variant="primary")
with gr.Column():
output_obj = gr.Model3D(label="Output model")
output_img = gr.Image(type="filepath", label="Output image")
gr.ClearButton(components=[input_img, input_video, output_img, output_obj], variant="stop")
run_btn.click(predict, [input_img, input_video], [output_img, output_obj])
with gr.Row():
blobs = [[f"examples/{x:02d}.png"] for x in range(1, 4)]
examples = gr.Dataset(components=[input_img], samples=blobs)
examples.click(lambda x: x[0], [examples], [input_img])
with gr.Row():
gr.HTML(footer)
app.launch(share=False, debug=True, show_error=True)
app.queue() |