File size: 4,379 Bytes
63f899c 59d9186 63f899c 59d9186 63f899c 59d9186 a6075c0 63f899c 8c79f6a a6075c0 59d9186 63f899c 59d9186 751c5b7 59d9186 63f899c 59d9186 63f899c 59d9186 63f899c a6075c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import os
import shutil
from huggingface_hub import snapshot_download
import gradio as gr
os.chdir(os.path.dirname(os.path.abspath(__file__)))
from scripts.inference import inference_process
import argparse
import uuid
hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")
is_shared_ui = True if "multimodalart/hallo" in os.environ['SPACE_ID'] else False
def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)):
if is_shared_ui:
raise gr.Error("This Space only works in duplicated instances")
unique_id = uuid.uuid4()
args = argparse.Namespace(
config='configs/inference/default.yaml',
source_image=source_image,
driving_audio=driving_audio,
output=f'output-{unique_id}.mp4',
pose_weight=1.0,
face_weight=1.0,
lip_weight=1.0,
face_expand_ratio=1.2,
checkpoint=None
)
inference_process(args)
return f'output-{unique_id}.mp4'
css = '''
div#warning-ready {
background-color: #ecfdf5;
padding: 0 16px 16px;
margin: 20px 0;
color: #030303!important;
}
div#warning-ready > .gr-prose > h2, div#warning-ready > .gr-prose > p {
color: #057857!important;
}
div#warning-duplicate {
background-color: #ebf5ff;
padding: 0 16px 16px;
margin: 20px 0;
color: #030303!important;
}
div#warning-duplicate > .gr-prose > h2, div#warning-duplicate > .gr-prose > p {
color: #0f4592!important;
}
div#warning-duplicate strong {
color: #0f4592;
}
p.actions {
display: flex;
align-items: center;
margin: 20px 0;
}
div#warning-duplicate .actions a {
display: inline-block;
margin-right: 10px;
}
'''
with gr.Blocks(css=css) as demo:
if is_shared_ui:
top_description = gr.HTML(f'''
<div class="gr-prose">
<h2 class="custom-color"><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg>
Attention: this Space need to be duplicated to work</h2>
<p class="main-message custom-color">
To make it work, <strong>duplicate the Space</strong> and run it on your own profile using a <strong>private</strong> GPU.<br />
An L4 costs <strong>US$0.80/h</strong>, so it should cost ~US$0.08 to generate 10s of talking head avatar.
</p>
<p class="actions custom-color">
<a href="https://huggingface.co./spaces/{os.environ['SPACE_ID']}?duplicate=true">
<img src="https://huggingface.co./datasets/huggingface/badges/resolve/main/duplicate-this-space-lg-dark.svg" alt="Duplicate this Space" />
</a>
to start generate your talking head
</p>
</div>
''', elem_id="warning-duplicate")
gr.Markdown("# Demo for Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation")
gr.Markdown("Generate talking head avatars driven from audio. **every 10 seconds of generation takes ~1 minute** - duplicate the space for private use or try for free on Google Colab")
with gr.Row():
with gr.Column():
avatar_face = gr.Image(type="filepath", label="Face")
driving_audio = gr.Audio(type="filepath", label="Driving audio")
generate = gr.Button("Generate")
with gr.Column():
output_video = gr.Video(label="Your talking head")
generate.click(
fn=run_inference,
intputs=[avatar_face, driving_audio],
outputs=output_video
)
demo.launch() |