fix: retargeting feature leakage

#7
by zzz66 - opened
.gitattributes CHANGED
@@ -47,12 +47,3 @@ pretrained_weights/liveportrait/base_models/warping_module.pth filter=lfs diff=l
47
  pretrained_weights/insightface/models/buffalo_l/2d106det.onnx filter=lfs diff=lfs merge=lfs -text
48
  pretrained_weights/insightface/models/buffalo_l/det_10g.onnx filter=lfs diff=lfs merge=lfs -text
49
  pretrained_weights/liveportrait/landmark.onnx filter=lfs diff=lfs merge=lfs -text
50
- assets/examples/driving/d14.mp4 filter=lfs diff=lfs merge=lfs -text
51
- assets/examples/source/s12.jpg filter=lfs diff=lfs merge=lfs -text
52
- assets/examples/driving/d14_trim.mp4 filter=lfs diff=lfs merge=lfs -text
53
- assets/examples/driving/d6_trim.mp4 filter=lfs diff=lfs merge=lfs -text
54
- assets/examples/driving/d15.mp4 filter=lfs diff=lfs merge=lfs -text
55
- assets/examples/driving/d16.mp4 filter=lfs diff=lfs merge=lfs -text
56
- assets/examples/driving/d18.mp4 filter=lfs diff=lfs merge=lfs -text
57
- assets/examples/driving/d19.mp4 filter=lfs diff=lfs merge=lfs -text
58
- assets/examples/source/s22.jpg filter=lfs diff=lfs merge=lfs -text
 
47
  pretrained_weights/insightface/models/buffalo_l/2d106det.onnx filter=lfs diff=lfs merge=lfs -text
48
  pretrained_weights/insightface/models/buffalo_l/det_10g.onnx filter=lfs diff=lfs merge=lfs -text
49
  pretrained_weights/liveportrait/landmark.onnx filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -7,7 +7,6 @@ sdk: gradio
7
  sdk_version: 4.37.2
8
  app_file: app.py
9
  pinned: false
10
- disable_embedding: true
11
  tags:
12
  - Multimodal
13
  - Motion control
@@ -18,4 +17,4 @@ tags:
18
  short_description: Apply the motion of a video on a portrait
19
  ---
20
 
21
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
7
  sdk_version: 4.37.2
8
  app_file: app.py
9
  pinned: false
 
10
  tags:
11
  - Multimodal
12
  - Motion control
 
17
  short_description: Apply the motion of a video on a portrait
18
  ---
19
 
20
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -46,14 +46,14 @@ def gpu_wrapped_execute_image(*args, **kwargs):
46
 
47
  def is_square_video(video_path):
48
  video = cv2.VideoCapture(video_path)
49
-
50
  width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
51
  height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
52
-
53
  video.release()
54
  if width != height:
55
  raise gr.Error("Error: the video does not have a square aspect ratio. We currently only support square videos")
56
-
57
  return gr.update(visible=True)
58
 
59
  # assets
@@ -63,10 +63,9 @@ example_video_dir = "assets/examples/driving"
63
  data_examples = [
64
  [osp.join(example_portrait_dir, "s9.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
65
  [osp.join(example_portrait_dir, "s6.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
66
- [osp.join(example_portrait_dir, "s10.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
67
- [osp.join(example_portrait_dir, "s5.jpg"), osp.join(example_video_dir, "d18.mp4"), True, True, True, True],
68
- [osp.join(example_portrait_dir, "s7.jpg"), osp.join(example_video_dir, "d19.mp4"), True, True, True, True],
69
- [osp.join(example_portrait_dir, "s22.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
70
  ]
71
  #################### interface logic ####################
72
 
@@ -92,8 +91,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
92
  [osp.join(example_portrait_dir, "s10.jpg")],
93
  [osp.join(example_portrait_dir, "s5.jpg")],
94
  [osp.join(example_portrait_dir, "s7.jpg")],
95
- [osp.join(example_portrait_dir, "s12.jpg")],
96
- [osp.join(example_portrait_dir, "s22.jpg")],
97
  ],
98
  inputs=[image_input],
99
  cache_examples=False,
@@ -103,10 +100,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
103
  gr.Examples(
104
  examples=[
105
  [osp.join(example_video_dir, "d0.mp4")],
106
- [osp.join(example_video_dir, "d18.mp4")],
107
- [osp.join(example_video_dir, "d19.mp4")],
108
- [osp.join(example_video_dir, "d14_trim.mp4")],
109
- [osp.join(example_video_dir, "d6_trim.mp4")],
110
  ],
111
  inputs=[video_input],
112
  cache_examples=False,
@@ -118,7 +114,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
118
  flag_relative_input = gr.Checkbox(value=True, label="relative motion")
119
  flag_do_crop_input = gr.Checkbox(value=True, label="do crop")
120
  flag_remap_input = gr.Checkbox(value=True, label="paste-back")
121
- gr.Markdown(load_description("assets/gradio_description_animate_clear.md"))
122
  with gr.Row():
123
  with gr.Column():
124
  process_button_animation = gr.Button("🚀 Animate", variant="primary")
@@ -133,7 +128,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
133
  output_video_concat.render()
134
  with gr.Row():
135
  # Examples
136
- gr.Markdown("## You could also choose the examples below by one click ⬇️")
137
  with gr.Row():
138
  gr.Examples(
139
  examples=data_examples,
@@ -146,7 +141,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
146
  flag_remap_input
147
  ],
148
  outputs=[output_image, output_image_paste_back],
149
- examples_per_page=6,
150
  cache_examples=False,
151
  )
152
  gr.Markdown(load_description("assets/gradio_description_retargeting.md"), visible=True)
@@ -176,8 +171,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
176
  [osp.join(example_portrait_dir, "s10.jpg")],
177
  [osp.join(example_portrait_dir, "s5.jpg")],
178
  [osp.join(example_portrait_dir, "s7.jpg")],
179
- [osp.join(example_portrait_dir, "s12.jpg")],
180
- [osp.join(example_portrait_dir, "s22.jpg")],
181
  ],
182
  inputs=[retargeting_input_image],
183
  cache_examples=False,
 
46
 
47
  def is_square_video(video_path):
48
  video = cv2.VideoCapture(video_path)
49
+
50
  width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
51
  height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
52
+
53
  video.release()
54
  if width != height:
55
  raise gr.Error("Error: the video does not have a square aspect ratio. We currently only support square videos")
56
+
57
  return gr.update(visible=True)
58
 
59
  # assets
 
63
  data_examples = [
64
  [osp.join(example_portrait_dir, "s9.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
65
  [osp.join(example_portrait_dir, "s6.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
66
+ [osp.join(example_portrait_dir, "s10.jpg"), osp.join(example_video_dir, "d5.mp4"), True, True, True, True],
67
+ [osp.join(example_portrait_dir, "s5.jpg"), osp.join(example_video_dir, "d6.mp4"), True, True, True, True],
68
+ [osp.join(example_portrait_dir, "s7.jpg"), osp.join(example_video_dir, "d7.mp4"), True, True, True, True],
 
69
  ]
70
  #################### interface logic ####################
71
 
 
91
  [osp.join(example_portrait_dir, "s10.jpg")],
92
  [osp.join(example_portrait_dir, "s5.jpg")],
93
  [osp.join(example_portrait_dir, "s7.jpg")],
 
 
94
  ],
95
  inputs=[image_input],
96
  cache_examples=False,
 
100
  gr.Examples(
101
  examples=[
102
  [osp.join(example_video_dir, "d0.mp4")],
103
+ [osp.join(example_video_dir, "d5.mp4")],
104
+ [osp.join(example_video_dir, "d6.mp4")],
105
+ [osp.join(example_video_dir, "d7.mp4")],
 
106
  ],
107
  inputs=[video_input],
108
  cache_examples=False,
 
114
  flag_relative_input = gr.Checkbox(value=True, label="relative motion")
115
  flag_do_crop_input = gr.Checkbox(value=True, label="do crop")
116
  flag_remap_input = gr.Checkbox(value=True, label="paste-back")
 
117
  with gr.Row():
118
  with gr.Column():
119
  process_button_animation = gr.Button("🚀 Animate", variant="primary")
 
128
  output_video_concat.render()
129
  with gr.Row():
130
  # Examples
131
+ gr.Markdown("## You could choose the examples below ⬇️")
132
  with gr.Row():
133
  gr.Examples(
134
  examples=data_examples,
 
141
  flag_remap_input
142
  ],
143
  outputs=[output_image, output_image_paste_back],
144
+ examples_per_page=5,
145
  cache_examples=False,
146
  )
147
  gr.Markdown(load_description("assets/gradio_description_retargeting.md"), visible=True)
 
171
  [osp.join(example_portrait_dir, "s10.jpg")],
172
  [osp.join(example_portrait_dir, "s5.jpg")],
173
  [osp.join(example_portrait_dir, "s7.jpg")],
 
 
174
  ],
175
  inputs=[retargeting_input_image],
176
  cache_examples=False,
assets/examples/driving/d1.mp4 ADDED
Binary file (48.8 kB). View file
 
assets/examples/driving/d14.mp4 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:465e72fbf26bf4ed46d1adf7aab8a7344aac54a2f92c4d82a1d53127f0170472
3
- size 891025
 
 
 
 
assets/examples/driving/d14_trim.mp4 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ddeda3ad54627d893afcbef9ca09d4e6b7b510d6c10407ce89d10f1b0e1cd16
3
- size 433589
 
 
 
 
assets/examples/driving/d15.mp4 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ff1f84228c8db9eee09b28372ddfc4d5752d779860fdb882287d8c2edcf99d4
3
- size 105285
 
 
 
 
assets/examples/driving/d16.mp4 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:853639a403c0285d8073ffd3aa3b80fb52b351f3a720785ce799694d6ab63a16
3
- size 68369
 
 
 
 
assets/examples/driving/d18.mp4 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1dc94c1fec7ef7dc831c8a49f0e1788ae568812cb68e62f6875d9070f573d02a
3
- size 187263
 
 
 
 
assets/examples/driving/d19.mp4 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3047ba66296d96b8a4584e412e61493d7bc0fa5149c77b130e7feea375e698bd
3
- size 232859
 
 
 
 
assets/examples/driving/d2.mp4 ADDED
Binary file (47.8 kB). View file
 
assets/examples/driving/d5.mp4 ADDED
Binary file (135 kB). View file
 
assets/examples/driving/d6_trim.mp4 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:de9e15eef87674433f2a60972da65e42b55fa154df7beaf4e0ee1cea1939774b
3
- size 530752
 
 
 
 
assets/examples/driving/d7.mp4 ADDED
Binary file (185 kB). View file
 
assets/examples/driving/d8.mp4 ADDED
Binary file (312 kB). View file
 
assets/examples/source/s12.jpg DELETED

Git LFS Details

  • SHA256: c3122ed04b2a73e02fd21cb80a7119ef9dedc7ae988f80c2ea20947c6292f3ea
  • Pointer size: 130 Bytes
  • Size of remote file: 50 kB
assets/examples/source/s22.jpg DELETED

Git LFS Details

  • SHA256: 113d76fe4c3693916cde74e0f1250b516a9ea6b679c28cbd910b46a6c090cc62
  • Pointer size: 131 Bytes
  • Size of remote file: 159 kB
assets/gradio_description_animate_clear.md DELETED
@@ -1,3 +0,0 @@
1
- <div style="font-size: 1.2em; text-align: center;">
2
- Step 3: Click the <strong>🚀 Animate</strong> button below to generate, or click 🧹 Clear to erase the results
3
- </div>
 
 
 
 
assets/gradio_description_retargeting.md CHANGED
@@ -1,13 +1 @@
1
- <br>
2
-
3
- <!-- ## Retargeting
4
- <span style="font-size: 1.2em;">🔥 To edit the eyes and lip open ratio of the source portrait, drag the sliders and click the <strong>🚗 Retargeting</strong> button. You can try running it multiple times. <strong>😊 Set both ratios to 0.8 to see what's going on!</strong> </span> -->
5
-
6
- <div style="display: flex; justify-content: center; align-items: center; text-align: center; font-size: 1.2em;">
7
- <div>
8
- <h2>Retargeting</h2>
9
- <p>Upload a Source Portrait as Retargeting Input, then drag the sliders and click the <strong>🚗 Retargeting</strong> button. You can try running it multiple times.
10
- <br>
11
- <strong>😊 Set both ratios to 0.8 to see what's going on!</strong></p>
12
- </div>
13
- </div>
 
1
+ <span style="font-size: 1.2em;">🔥 To change the eyes and lip open ratio of the source portrait, please drag the sliders and then click the <strong>🚗 Retargeting</strong> button. The result would be shown in the blocks. You can try running it multiple times. <strong>😊 Set both ratios to 0.8 to see what's going on!</strong> </span>
 
 
 
 
 
 
 
 
 
 
 
 
assets/gradio_description_upload.md CHANGED
@@ -1,30 +1,2 @@
1
- <!-- ## 🤗 This is the official gradio demo for LivePortrait. -->
2
- <!-- <div style="font-size: 1.0em;">
3
- If you find LivePortrait fun 🤪 or useful, please consider starring 🌟 our <a href="https://github.com/KwaiVGI/LivePortrait">GitHub Repo</a> to discover more features!
4
- </div>
5
-
6
-
7
- <!-- <div style="font-size: 1.2em;">
8
- Step1: upload or use a webcam to get a <strong>Source Portrait</strong> (any aspect ratio) to left side.<br>
9
- Step2: upload a <strong>Driving Video</strong> (1:1 aspect ratio) to right side.
10
- </div> -->
11
-
12
- <br>
13
- <div style="font-size: 1.2em; display: flex; justify-content: space-between;">
14
- <div style="flex: 1; text-align: center; margin-right: 20px;">
15
- <div style="display: inline-block;">
16
- Step 1: Upload a <strong>Source Portrait</strong> (any aspect ratio) ⬇️
17
- </div>
18
- <div style="display: inline-block; font-size: 0.75em;">
19
- <strong>Note:</strong> To upload a source video, <a href="https://github.com/KwaiVGI/LivePortrait?tab=readme-ov-file#4-gradio-interface-"><strong>build Gradio locally</strong></a>. Windows users can use the <a href="https://huggingface.co/cleardusk/LivePortrait-Windows"><strong>one-click package</strong></a>. Animals model <a href="https://github.com/KwaiVGI/LivePortrait/blob/main/assets/docs/changelog/2024-08-02.md"><strong>here</strong></a>.
20
- </div>
21
- </div>
22
- <div style="flex: 1; text-align: center; margin-left: 20px;">
23
- <div style="display: inline-block;">
24
- Step 2: Upload a <strong>Driving Video</strong> (1:1 aspect ratio) ⬇️
25
- </div>
26
- <div style="display: inline-block; font-size: 0.75em;">
27
- <strong>Tips:</strong> Focus on the head, minimize shoulder movement, <strong>neutral expression</strong> in first frame.
28
- </div>
29
- </div>
30
- </div>
 
1
+ ## 🤗 This is the official gradio demo for **LivePortrait**.
2
+ <div style="font-size: 1.2em;">Please upload or use the webcam to get a source portrait to the <strong>Source Portrait</strong> field and a driving video to the <strong>Driving Video</strong> field.</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
assets/gradio_title.md CHANGED
@@ -1,17 +1,10 @@
1
  <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
2
- <div>
3
- <h1>LivePortrait</h1>
4
- <span>Add mimics and lip sync to your static portrait driven by a video</span>
5
- <br>
6
- <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
7
- <a href="https://arxiv.org/pdf/2407.03168"><img src="https://img.shields.io/badge/arXiv-2407.03168-red"></a>
8
- &nbsp;
9
- <a href="https://liveportrait.github.io"><img src="https://img.shields.io/badge/Project_Page-LivePortrait-green" alt="Project Page"></a>
10
- &nbsp;
11
- <a href="https://github.com/KwaiVGI/LivePortrait"><img src="https://img.shields.io/badge/Github-Code-blue"></a>
12
- &nbsp;
13
- <a href="https://github.com/KwaiVGI/LivePortrait"><img src="https://img.shields.io/github/stars/KwaiVGI/LivePortrait
14
- "></a>
15
  </div>
16
- </div>
17
  </div>
 
1
  <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
2
+ <div>
3
+ <h1>LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control</h1>
4
+ <div style="display: flex; justify-content: center; align-items: center; text-align: center;>
5
+ <a href="https://arxiv.org/pdf/2407.03168"><img src="https://img.shields.io/badge/arXiv-2407.03168-red"></a>
6
+ <a href="https://liveportrait.github.io"><img src="https://img.shields.io/badge/Project_Page-LivePortrait-green" alt="Project Page"></a>
7
+ <a href="https://github.com/KwaiVGI/LivePortrait"><img src="https://img.shields.io/badge/Github-Code-blue"></a>
8
+ </div>
 
 
 
 
 
 
9
  </div>
 
10
  </div>
readme.md CHANGED
@@ -1,12 +1,143 @@
1
- This is the official Space of the paper: [**LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control**](https://arxiv.org/abs/2407.03168)
2
 
3
- If you find LivePortrait useful for your research, welcome to cite our work using the following BibTeX:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  ```bibtex
5
- @article{guo2024liveportrait,
6
  title = {LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control},
7
- author = {Guo, Jianzhu and Zhang, Dingyun and Liu, Xiaoqiang and Zhong, Zhizhou and Zhang, Yuan and Wan, Pengfei and Zhang, Di},
8
- journal = {arXiv preprint arXiv:2407.03168},
9
- year = {2024}
10
  }
11
  ```
12
-
 
1
+ <h1 align="center">LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control</h1>
2
 
3
+ <div align='center'>
4
+ <a href='https://github.com/cleardusk' target='_blank'><strong>Jianzhu Guo</strong></a><sup> 1†</sup>&emsp;
5
+ <a href='https://github.com/KwaiVGI' target='_blank'><strong>Dingyun Zhang</strong></a><sup> 1,2</sup>&emsp;
6
+ <a href='https://github.com/KwaiVGI' target='_blank'><strong>Xiaoqiang Liu</strong></a><sup> 1</sup>&emsp;
7
+ <a href='https://github.com/KwaiVGI' target='_blank'><strong>Zhizhou Zhong</strong></a><sup> 1,3</sup>&emsp;
8
+ <a href='https://scholar.google.com.hk/citations?user=_8k1ubAAAAAJ' target='_blank'><strong>Yuan Zhang</strong></a><sup> 1</sup>&emsp;
9
+ </div>
10
+
11
+ <div align='center'>
12
+ <a href='https://scholar.google.com/citations?user=P6MraaYAAAAJ' target='_blank'><strong>Pengfei Wan</strong></a><sup> 1</sup>&emsp;
13
+ <a href='https://openreview.net/profile?id=~Di_ZHANG3' target='_blank'><strong>Di Zhang</strong></a><sup> 1</sup>&emsp;
14
+ </div>
15
+
16
+ <div align='center'>
17
+ <sup>1 </sup>Kuaishou Technology&emsp; <sup>2 </sup>University of Science and Technology of China&emsp; <sup>3 </sup>Fudan University&emsp;
18
+ </div>
19
+
20
+ <br>
21
+ <div align="center">
22
+ <!-- <a href='LICENSE'><img src='https://img.shields.io/badge/license-MIT-yellow'></a> -->
23
+ <a href='https://liveportrait.github.io'><img src='https://img.shields.io/badge/Project-Homepage-green'></a>
24
+ <a href='https://arxiv.org/pdf/2407.03168'><img src='https://img.shields.io/badge/Paper-arXiv-red'></a>
25
+ </div>
26
+ <br>
27
+
28
+ <p align="center">
29
+ <img src="./assets/docs/showcase2.gif" alt="showcase">
30
+ <br>
31
+ 🔥 For more results, visit our <a href="https://liveportrait.github.io/"><strong>homepage</strong></a> 🔥
32
+ </p>
33
+
34
+
35
+
36
+ ## 🔥 Updates
37
+ - **`2024/07/04`**: 🔥 We released the initial version of the inference code and models. Continuous updates, stay tuned!
38
+ - **`2024/07/04`**: 😊 We released the [homepage](https://liveportrait.github.io) and technical report on [arXiv](https://arxiv.org/pdf/2407.03168).
39
+
40
+ ## Introduction
41
+ This repo, named **LivePortrait**, contains the official PyTorch implementation of our paper [LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control](https://arxiv.org/pdf/2407.03168).
42
+ We are actively updating and improving this repository. If you find any bugs or have suggestions, welcome to raise issues or submit pull requests (PR) 💖.
43
+
44
+ ## 🔥 Getting Started
45
+ ### 1. Clone the code and prepare the environment
46
+ ```bash
47
+ git clone https://github.com/KwaiVGI/LivePortrait
48
+ cd LivePortrait
49
+
50
+ # create env using conda
51
+ conda create -n LivePortrait python==3.9.18
52
+ conda activate LivePortrait
53
+ # install dependencies with pip
54
+ pip install -r requirements.txt
55
+ ```
56
+
57
+ ### 2. Download pretrained weights
58
+ Download our pretrained LivePortrait weights and face detection models of InsightFace from [Google Drive](https://drive.google.com/drive/folders/1UtKgzKjFAOmZkhNK-OYT0caJ_w2XAnib) or [Baidu Yun](https://pan.baidu.com/s/1MGctWmNla_vZxDbEp2Dtzw?pwd=z5cn). We have packed all weights in one directory 😊. Unzip and place them in `./pretrained_weights` ensuring the directory structure is as follows:
59
+ ```text
60
+ pretrained_weights
61
+ ├── insightface
62
+ │ └── models
63
+ │ └── buffalo_l
64
+ │ ├── 2d106det.onnx
65
+ │ └── det_10g.onnx
66
+ └── liveportrait
67
+ ├── base_models
68
+ │ ├── appearance_feature_extractor.pth
69
+ │ ├── motion_extractor.pth
70
+ │ ├── spade_generator.pth
71
+ │ └── warping_module.pth
72
+ ├── landmark.onnx
73
+ └── retargeting_models
74
+ └── stitching_retargeting_module.pth
75
+ ```
76
+
77
+ ### 3. Inference 🚀
78
+
79
+ ```bash
80
+ python inference.py
81
+ ```
82
+
83
+ If the script runs successfully, you will get an output mp4 file named `animations/s6--d0_concat.mp4`. This file includes the following results: driving video, input image, and generated result.
84
+
85
+ <p align="center">
86
+ <img src="./assets/docs/inference.gif" alt="image">
87
+ </p>
88
+
89
+ Or, you can change the input by specifying the `-s` and `-d` arguments:
90
+
91
+ ```bash
92
+ python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d0.mp4
93
+
94
+ # or disable pasting back
95
+ python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d0.mp4 --no_flag_pasteback
96
+
97
+ # more options to see
98
+ python inference.py -h
99
+ ```
100
+
101
+ **More interesting results can be found in our [Homepage](https://liveportrait.github.io)** 😊
102
+
103
+ ### 4. Gradio interface
104
+
105
+ We also provide a Gradio interface for a better experience, just run by:
106
+
107
+ ```bash
108
+ python app.py
109
+ ```
110
+
111
+ ### 5. Inference speed evaluation 🚀🚀🚀
112
+ We have also provided a script to evaluate the inference speed of each module:
113
+
114
+ ```bash
115
+ python speed.py
116
+ ```
117
+
118
+ Below are the results of inferring one frame on an RTX 4090 GPU using the native PyTorch framework with `torch.compile`:
119
+
120
+ | Model | Parameters(M) | Model Size(MB) | Inference(ms) |
121
+ |-----------------------------------|:-------------:|:--------------:|:-------------:|
122
+ | Appearance Feature Extractor | 0.84 | 3.3 | 0.82 |
123
+ | Motion Extractor | 28.12 | 108 | 0.84 |
124
+ | Spade Generator | 55.37 | 212 | 7.59 |
125
+ | Warping Module | 45.53 | 174 | 5.21 |
126
+ | Stitching and Retargeting Modules| 0.23 | 2.3 | 0.31 |
127
+
128
+ *Note: the listed values of Stitching and Retargeting Modules represent the combined parameter counts and the total sequential inference time of three MLP networks.*
129
+
130
+
131
+ ## Acknowledgements
132
+ We would like to thank the contributors of [FOMM](https://github.com/AliaksandrSiarohin/first-order-model), [Open Facevid2vid](https://github.com/zhanglonghao1992/One-Shot_Free-View_Neural_Talking_Head_Synthesis), [SPADE](https://github.com/NVlabs/SPADE), [InsightFace](https://github.com/deepinsight/insightface) repositories, for their open research and contributions.
133
+
134
+ ## Citation 💖
135
+ If you find LivePortrait useful for your research, welcome to 🌟 this repo and cite our work using the following BibTeX:
136
  ```bibtex
137
+ @article{guo2024live,
138
  title = {LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control},
139
+ author = {Jianzhu Guo and Dingyun Zhang and Xiaoqiang Liu and Zhizhou Zhong and Yuan Zhang and Pengfei Wan and Di Zhang},
140
+ year = {2024},
141
+ journal = {arXiv preprint:2407.03168},
142
  }
143
  ```
 
src/config/argument_config.py CHANGED
@@ -8,7 +8,6 @@ import os.path as osp
8
  from dataclasses import dataclass
9
  import tyro
10
  from typing_extensions import Annotated
11
- from typing import Optional
12
  from .base_config import PrintableConfig, make_abs_path
13
 
14
 
@@ -42,4 +41,4 @@ class ArgumentConfig(PrintableConfig):
42
  ########## gradio arguments ##########
43
  server_port: Annotated[int, tyro.conf.arg(aliases=["-p"])] = 7860
44
  share: bool = False
45
- server_name: Optional[str] = None # one can set "0.0.0.0" on local
 
8
  from dataclasses import dataclass
9
  import tyro
10
  from typing_extensions import Annotated
 
11
  from .base_config import PrintableConfig, make_abs_path
12
 
13
 
 
41
  ########## gradio arguments ##########
42
  server_port: Annotated[int, tyro.conf.arg(aliases=["-p"])] = 7860
43
  share: bool = False
44
+ server_name: str = None # one can set "0.0.0.0" on local
src/gradio_pipeline.py CHANGED
@@ -9,7 +9,8 @@ from .live_portrait_pipeline import LivePortraitPipeline
9
  from .utils.io import load_img_online
10
  from .utils.rprint import rlog as log
11
  from .utils.crop import prepare_paste_back, paste_back
12
- # from .utils.camera import get_rotation_matrix
 
13
 
14
  def update_args(args, user_args):
15
  """update the args according to user inputs
@@ -33,7 +34,7 @@ class GradioPipeline(LivePortraitPipeline):
33
  flag_relative_input,
34
  flag_do_crop_input,
35
  flag_remap_input,
36
- ):
37
  """ for video driven potrait animation
38
  """
39
  if input_image_path is not None and input_video_path is not None:
@@ -53,7 +54,7 @@ class GradioPipeline(LivePortraitPipeline):
53
  # gr.Info("Run successfully!", duration=2)
54
  return video_path, video_path_concat,
55
  else:
56
- raise gr.Error("Please upload the source portrait and driving video 🤗🤗🤗", duration=5)
57
 
58
  def execute_image(self, input_eye_ratio: float, input_lip_ratio: float, input_image, flag_do_crop = True):
59
  """ for single image retargeting
@@ -62,7 +63,7 @@ class GradioPipeline(LivePortraitPipeline):
62
  f_s_user, x_s_user, source_lmk_user, crop_M_c2o, mask_ori, img_rgb = \
63
  self.prepare_retargeting(input_image, flag_do_crop)
64
 
65
- if input_eye_ratio is None or input_lip_ratio is None:
66
  raise gr.Error("Invalid ratio input 💥!", duration=5)
67
  else:
68
  x_s_user = x_s_user.to("cuda")
@@ -91,7 +92,7 @@ class GradioPipeline(LivePortraitPipeline):
91
  # gr.Info("Upload successfully!", duration=2)
92
  inference_cfg = self.live_portrait_wrapper.cfg
93
  ######## process source portrait ########
94
- img_rgb = load_img_online(input_image, mode='rgb', max_dim=1280, n=1) # n=1 means do not trim the pixels
95
  log(f"Load source image from {input_image}.")
96
  crop_info = self.cropper.crop_single_image(img_rgb)
97
  if flag_do_crop:
@@ -99,7 +100,7 @@ class GradioPipeline(LivePortraitPipeline):
99
  else:
100
  I_s = self.live_portrait_wrapper.prepare_source(img_rgb)
101
  x_s_info = self.live_portrait_wrapper.get_kp_info(I_s)
102
- # R_s = get_rotation_matrix(x_s_info['pitch'], x_s_info['yaw'], x_s_info['roll'])
103
  ############################################
104
  f_s_user = self.live_portrait_wrapper.extract_feature_3d(I_s)
105
  x_s_user = self.live_portrait_wrapper.transform_keypoint(x_s_info)
@@ -109,4 +110,5 @@ class GradioPipeline(LivePortraitPipeline):
109
  return f_s_user, x_s_user, source_lmk_user, crop_M_c2o, mask_ori, img_rgb
110
  else:
111
  # when press the clear button, go here
112
- raise gr.Error("Please upload a source portrait as the retargeting input 🤗🤗🤗", duration=5)
 
 
9
  from .utils.io import load_img_online
10
  from .utils.rprint import rlog as log
11
  from .utils.crop import prepare_paste_back, paste_back
12
+ from .utils.camera import get_rotation_matrix
13
+ from .utils.retargeting_utils import calc_eye_close_ratio, calc_lip_close_ratio
14
 
15
  def update_args(args, user_args):
16
  """update the args according to user inputs
 
34
  flag_relative_input,
35
  flag_do_crop_input,
36
  flag_remap_input,
37
+ ):
38
  """ for video driven potrait animation
39
  """
40
  if input_image_path is not None and input_video_path is not None:
 
54
  # gr.Info("Run successfully!", duration=2)
55
  return video_path, video_path_concat,
56
  else:
57
+ raise gr.Error("The input source portrait or driving video hasn't been prepared yet 💥!", duration=5)
58
 
59
  def execute_image(self, input_eye_ratio: float, input_lip_ratio: float, input_image, flag_do_crop = True):
60
  """ for single image retargeting
 
63
  f_s_user, x_s_user, source_lmk_user, crop_M_c2o, mask_ori, img_rgb = \
64
  self.prepare_retargeting(input_image, flag_do_crop)
65
 
66
+ if input_eye_ratio is None or input_eye_ratio is None:
67
  raise gr.Error("Invalid ratio input 💥!", duration=5)
68
  else:
69
  x_s_user = x_s_user.to("cuda")
 
92
  # gr.Info("Upload successfully!", duration=2)
93
  inference_cfg = self.live_portrait_wrapper.cfg
94
  ######## process source portrait ########
95
+ img_rgb = load_img_online(input_image, mode='rgb', max_dim=1280, n=16)
96
  log(f"Load source image from {input_image}.")
97
  crop_info = self.cropper.crop_single_image(img_rgb)
98
  if flag_do_crop:
 
100
  else:
101
  I_s = self.live_portrait_wrapper.prepare_source(img_rgb)
102
  x_s_info = self.live_portrait_wrapper.get_kp_info(I_s)
103
+ R_s = get_rotation_matrix(x_s_info['pitch'], x_s_info['yaw'], x_s_info['roll'])
104
  ############################################
105
  f_s_user = self.live_portrait_wrapper.extract_feature_3d(I_s)
106
  x_s_user = self.live_portrait_wrapper.transform_keypoint(x_s_info)
 
110
  return f_s_user, x_s_user, source_lmk_user, crop_M_c2o, mask_ori, img_rgb
111
  else:
112
  # when press the clear button, go here
113
+ raise gr.Error("The retargeting input hasn't been prepared yet 💥!", duration=5)
114
+
src/live_portrait_pipeline.py CHANGED
@@ -4,13 +4,13 @@
4
  Pipeline of LivePortrait
5
  """
6
 
7
- import torch
8
- torch.backends.cudnn.benchmark = True # disable CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR warning
 
9
 
10
  import cv2
11
  import numpy as np
12
  import pickle
13
- import os
14
  import os.path as osp
15
  from rich.progress import track
16
 
@@ -19,7 +19,7 @@ from .config.inference_config import InferenceConfig
19
  from .config.crop_config import CropConfig
20
  from .utils.cropper import Cropper
21
  from .utils.camera import get_rotation_matrix
22
- from .utils.video import images2video, concat_frames, get_fps, add_audio_to_video, has_audio_stream
23
  from .utils.crop import _transform_img, prepare_paste_back, paste_back
24
  from .utils.retargeting_utils import calc_lip_close_ratio
25
  from .utils.io import load_image_rgb, load_driving_info, resize_to_limit
@@ -68,12 +68,8 @@ class LivePortraitPipeline(object):
68
  ############################################
69
 
70
  ######## process driving info ########
71
- output_fps = 30 # default fps
72
  if is_video(args.driving_info):
73
  log(f"Load from video file (mp4 mov avi etc...): {args.driving_info}")
74
- output_fps = int(get_fps(args.driving_info))
75
- log(f'The FPS of {args.driving_info} is: {output_fps}')
76
-
77
  # TODO: 这里track一下驱动视频 -> 构建模板
78
  driving_rgb_lst = load_driving_info(args.driving_info)
79
  driving_rgb_lst_256 = [cv2.resize(_, (256, 256)) for _ in driving_rgb_lst]
@@ -178,32 +174,17 @@ class LivePortraitPipeline(object):
178
 
179
  mkdir(args.output_dir)
180
  wfp_concat = None
181
- flag_has_audio = has_audio_stream(args.driving_info)
182
-
183
  if is_video(args.driving_info):
184
  frames_concatenated = concat_frames(I_p_lst, driving_rgb_lst, img_crop_256x256)
185
  # save (driving frames, source image, drived frames) result
186
  wfp_concat = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}_concat.mp4')
187
- images2video(frames_concatenated, wfp=wfp_concat, fps=output_fps)
188
- if flag_has_audio:
189
- # final result with concat
190
- wfp_concat_with_audio = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}_concat_with_audio.mp4')
191
- add_audio_to_video(wfp_concat, args.driving_info, wfp_concat_with_audio)
192
- os.replace(wfp_concat_with_audio, wfp_concat)
193
- log(f"Replace {wfp_concat} with {wfp_concat_with_audio}")
194
 
195
  # save drived result
196
  wfp = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}.mp4')
197
  if inference_cfg.flag_pasteback:
198
- images2video(I_p_paste_lst, wfp=wfp, fps=output_fps)
199
  else:
200
- images2video(I_p_lst, wfp=wfp, fps=output_fps)
201
-
202
- ######### build final result #########
203
- if flag_has_audio:
204
- wfp_with_audio = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}_with_audio.mp4')
205
- add_audio_to_video(wfp, args.driving_info, wfp_with_audio)
206
- os.replace(wfp_with_audio, wfp)
207
- log(f"Replace {wfp} with {wfp_with_audio}")
208
 
209
  return wfp, wfp_concat
 
4
  Pipeline of LivePortrait
5
  """
6
 
7
+ # TODO:
8
+ # 1. 当前假定所有的模板都是已经裁好的,需要修改下
9
+ # 2. pick样例图 source + driving
10
 
11
  import cv2
12
  import numpy as np
13
  import pickle
 
14
  import os.path as osp
15
  from rich.progress import track
16
 
 
19
  from .config.crop_config import CropConfig
20
  from .utils.cropper import Cropper
21
  from .utils.camera import get_rotation_matrix
22
+ from .utils.video import images2video, concat_frames
23
  from .utils.crop import _transform_img, prepare_paste_back, paste_back
24
  from .utils.retargeting_utils import calc_lip_close_ratio
25
  from .utils.io import load_image_rgb, load_driving_info, resize_to_limit
 
68
  ############################################
69
 
70
  ######## process driving info ########
 
71
  if is_video(args.driving_info):
72
  log(f"Load from video file (mp4 mov avi etc...): {args.driving_info}")
 
 
 
73
  # TODO: 这里track一下驱动视频 -> 构建模板
74
  driving_rgb_lst = load_driving_info(args.driving_info)
75
  driving_rgb_lst_256 = [cv2.resize(_, (256, 256)) for _ in driving_rgb_lst]
 
174
 
175
  mkdir(args.output_dir)
176
  wfp_concat = None
 
 
177
  if is_video(args.driving_info):
178
  frames_concatenated = concat_frames(I_p_lst, driving_rgb_lst, img_crop_256x256)
179
  # save (driving frames, source image, drived frames) result
180
  wfp_concat = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}_concat.mp4')
181
+ images2video(frames_concatenated, wfp=wfp_concat)
 
 
 
 
 
 
182
 
183
  # save drived result
184
  wfp = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}.mp4')
185
  if inference_cfg.flag_pasteback:
186
+ images2video(I_p_paste_lst, wfp=wfp)
187
  else:
188
+ images2video(I_p_lst, wfp=wfp)
 
 
 
 
 
 
 
189
 
190
  return wfp, wfp_concat
src/utils/video.py CHANGED
@@ -12,12 +12,11 @@ import cv2
12
 
13
  from rich.progress import track
14
  from .helper import prefix
15
- from .rprint import rlog as log
16
  from .rprint import rprint as print
17
 
18
 
19
  def exec_cmd(cmd):
20
- return subprocess.run(cmd, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
21
 
22
 
23
  def images2video(images, wfp, **kwargs):
@@ -132,72 +131,9 @@ def change_video_fps(input_file, output_file, fps=20, codec='libx264', crf=5):
132
  exec_cmd(cmd)
133
 
134
 
135
- def get_fps(filepath, default_fps=25):
136
- try:
137
- fps = cv2.VideoCapture(filepath).get(cv2.CAP_PROP_FPS)
138
-
139
- if fps in (0, None):
140
- fps = default_fps
141
- except Exception as e:
142
- log(e)
143
- fps = default_fps
144
-
145
  return fps
146
-
147
-
148
- def has_audio_stream(video_path: str) -> bool:
149
- """
150
- Check if the video file contains an audio stream.
151
-
152
- :param video_path: Path to the video file
153
- :return: True if the video contains an audio stream, False otherwise
154
- """
155
- if osp.isdir(video_path):
156
- return False
157
-
158
- cmd = [
159
- 'ffprobe',
160
- '-v', 'error',
161
- '-select_streams', 'a',
162
- '-show_entries', 'stream=codec_type',
163
- '-of', 'default=noprint_wrappers=1:nokey=1',
164
- f'"{video_path}"'
165
- ]
166
-
167
- try:
168
- # result = subprocess.run(cmd, capture_output=True, text=True)
169
- result = exec_cmd(' '.join(cmd))
170
- if result.returncode != 0:
171
- log(f"Error occurred while probing video: {result.stderr}")
172
- return False
173
-
174
- # Check if there is any output from ffprobe command
175
- return bool(result.stdout.strip())
176
- except Exception as e:
177
- log(
178
- f"Error occurred while probing video: {video_path}, "
179
- "you may need to install ffprobe! (https://ffmpeg.org/download.html) "
180
- "Now set audio to false!",
181
- style="bold red"
182
- )
183
- return False
184
-
185
-
186
- def add_audio_to_video(silent_video_path: str, audio_video_path: str, output_video_path: str):
187
- cmd = [
188
- 'ffmpeg',
189
- '-y',
190
- '-i', f'"{silent_video_path}"',
191
- '-i', f'"{audio_video_path}"',
192
- '-map', '0:v',
193
- '-map', '1:a',
194
- '-c:v', 'copy',
195
- '-shortest',
196
- f'"{output_video_path}"'
197
- ]
198
-
199
- try:
200
- exec_cmd(' '.join(cmd))
201
- log(f"Video with audio generated successfully: {output_video_path}")
202
- except subprocess.CalledProcessError as e:
203
- log(f"Error occurred: {e}")
 
12
 
13
  from rich.progress import track
14
  from .helper import prefix
 
15
  from .rprint import rprint as print
16
 
17
 
18
  def exec_cmd(cmd):
19
+ subprocess.run(cmd, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
20
 
21
 
22
  def images2video(images, wfp, **kwargs):
 
131
  exec_cmd(cmd)
132
 
133
 
134
+ def get_fps(filepath):
135
+ import ffmpeg
136
+ probe = ffmpeg.probe(filepath)
137
+ video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
138
+ fps = eval(video_stream['avg_frame_rate'])
 
 
 
 
 
139
  return fps
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
video2template.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+
3
+ """
4
+ [WIP] Pipeline for video template preparation
5
+ """
6
+
7
+ import tyro
8
+ from src.config.crop_config import CropConfig
9
+ from src.config.inference_config import InferenceConfig
10
+ from src.config.argument_config import ArgumentConfig
11
+ from src.template_maker import TemplateMaker
12
+
13
+
14
+ def partial_fields(target_class, kwargs):
15
+ return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
16
+
17
+
18
+ def main():
19
+ # set tyro theme
20
+ tyro.extras.set_accent_color("bright_cyan")
21
+ args = tyro.cli(ArgumentConfig)
22
+
23
+ # specify configs for inference
24
+ inference_cfg = partial_fields(InferenceConfig, args.__dict__) # use attribute of args to initial InferenceConfig
25
+ crop_cfg = partial_fields(CropConfig, args.__dict__) # use attribute of args to initial CropConfig
26
+
27
+ video_template_maker = TemplateMaker(
28
+ inference_cfg=inference_cfg,
29
+ crop_cfg=crop_cfg
30
+ )
31
+
32
+ # run
33
+ video_template_maker.make_motion_template(args.driving_video_path, args.template_output_dir)
34
+
35
+
36
+ if __name__ == '__main__':
37
+ main()