import gradio as gr import numpy as np import cv2 from PIL import Image from PIFuHD.data import EvalWMetaDataset from PIFuHD.data.ImageBundle import ImageBundle from PIFuHD.options import BaseOptions from PIFuHD.recontructor import Reconstructor from huggingface_hub import hf_hub_download from human_pose_estimator import PoseEstimator from estimator import rect REPO_ID = "cxeep/PIFuHD" pose_estimator = PoseEstimator("cpu") checkpoint_path = hf_hub_download(repo_id=REPO_ID, filename="pifuhd.pt") cmd = [ '--dataroot', './data', '--results_path', './results', '--loadSize', '1024', '--resolution', '256', '--load_netMR_checkpoint_path', checkpoint_path, '--start_id', '-1', '--end_id', '-1' ] parser = BaseOptions() opts = parser.parse(cmd) reconstructor = Reconstructor(opts) def make_bundle(image, name): image, rects = rect(pose_estimator, image) return ImageBundle(img=image, name=name, meta=rects) def process_video(video_path): frames = [] cap = cv2.VideoCapture(video_path) while cap.isOpened(): ret, frame = cap.read() if not ret: break frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frames.append(Image.fromarray(frame)) cap.release() models = [] for frame in frames: bundle = make_bundle(np.array(frame), "Model3D") dataset = EvalWMetaDataset(opts, [bundle]) model = reconstructor.evaluate(dataset) models.append(model) # TODO: Combine models into animation output_animation = models[0] # Placeholder, replace with actual animation return output_animation def predict(input_img, input_video): if input_video: return process_video(input_video) elif input_img is not None: bundle = make_bundle(input_img, "Model3D") dataset = EvalWMetaDataset(opts, [bundle]) return reconstructor.evaluate(dataset) else: return None, None footer = r""" """ with gr.Blocks(title="PIFuHD") as app: gr.HTML("

3D Human Digitization

") gr.HTML("

Multi-Level Pixel-Aligned Implicit Function for High-Resolution 3D Models

") with gr.Row(equal_height=False): with gr.Column(): input_img = gr.Image(type="numpy", label="Input image") input_video = gr.Video(label="Input Video") run_btn = gr.Button(variant="primary") with gr.Column(): output_obj = gr.Model3D(label="Output model") output_img = gr.Image(type="filepath", label="Output image") gr.ClearButton(components=[input_img, input_video, output_img, output_obj], variant="stop") run_btn.click(predict, [input_img, input_video], [output_img, output_obj]) with gr.Row(): blobs = [[f"examples/{x:02d}.png"] for x in range(1, 4)] examples = gr.Dataset(components=[input_img], samples=blobs) examples.click(lambda x: x[0], [examples], [input_img]) with gr.Row(): gr.HTML(footer) app.launch(share=False, debug=True, show_error=True) app.queue()