diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000000000000000000000000000000000000..b88a39dcf36b90aae0763caaee5e3afe0cc4159f --- /dev/null +++ b/.editorconfig @@ -0,0 +1,8 @@ +root = true + +[*] +end_of_line = lf +insert_final_newline = true +indent_size = 4 +indent_style = tab +trim_trailing_whitespace = true diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000000000000000000000000000000000000..1ca6ddab2cb4a43fae4eba8179b930614b997d85 --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +select = E3, E4, F +per-file-ignores = facefusion/core.py:E402 diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..f6b1f326ca4ab7cf0c8798856f8fe0020ff82d58 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000000000000000000000000000000000000..718d8a695a46024d6d04f1b183f42c1d51b02a46 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,2 @@ +github: henryruhs +custom: https://paypal.me/henryruhs diff --git a/.github/preview.png b/.github/preview.png new file mode 100644 index 0000000000000000000000000000000000000000..1ed36792051e8cd65195b767726c25e6858fc0df --- /dev/null +++ b/.github/preview.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b95e8f371bb61701095b97c76df17ba51f903c613f7ccc9b2195c4b0cef066c7 +size 1221656 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..8da617ba68f5543bfbd8d05b2a6c2a04d3db58ae --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,35 @@ +name: ci + +on: [ push, pull_request ] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Set up Python 3.10 + uses: actions/setup-python@v2 + with: + python-version: '3.10' + - run: pip install flake8 + - run: pip install mypy + - run: flake8 run.py facefusion tests + - run: mypy run.py facefusion tests + test: + strategy: + matrix: + os: [ macos-latest, ubuntu-latest, windows-latest ] + runs-on: ${{ matrix.os }} + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Set up ffmpeg + uses: FedericoCarboni/setup-ffmpeg@v2 + - name: Set up Python 3.10 + uses: actions/setup-python@v2 + with: + python-version: '3.10' + - run: python install.py --onnxruntime default --skip-conda + - run: pip install pytest + - run: pytest diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..66381e3173c0fba9dfe95d06a57eab5a8d9a454d --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.assets +.idea +.vscode diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000000000000000000000000000000000000..a93bed3fd5ec357ae9b8133d0a237eac88665da3 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,3 @@ +MIT license + +Copyright (c) 2023 Henry Ruhs diff --git a/README.md b/README.md index 33883e22c71826a98cc39cd9797cda4a25bc40a6..2d4d2504686b384dcccd2361c1d311a8ca0ac3c9 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,112 @@ ---- -title: Facefusion -emoji: 📊 -colorFrom: red -colorTo: green -sdk: gradio -sdk_version: 4.27.0 -app_file: app.py -pinned: false -license: mit ---- - -Check out the configuration reference at https://huggingface.co./docs/hub/spaces-config-reference +FaceFusion +========== + +> Next generation face swapper and enhancer. + +[![Build Status](https://img.shields.io/github/actions/workflow/status/facefusion/facefusion/ci.yml.svg?branch=master)](https://github.com/facefusion/facefusion/actions?query=workflow:ci) +![License](https://img.shields.io/badge/license-MIT-green) + + +Preview +------- + +![Preview](https://raw.githubusercontent.com/facefusion/facefusion/master/.github/preview.png?sanitize=true) + + +Installation +------------ + +Be aware, the installation needs technical skills and is not for beginners. Please do not open platform and installation related issues on GitHub. We have a very helpful [Discord](https://join.facefusion.io) community that will guide you to complete the installation. + +Get started with the [installation](https://docs.facefusion.io/installation) guide. + + +Usage +----- + +Run the command: + +``` +python run.py [options] + +options: + -h, --help show this help message and exit + -s SOURCE_PATHS, --source SOURCE_PATHS choose single or multiple source images or audios + -t TARGET_PATH, --target TARGET_PATH choose single target image or video + -o OUTPUT_PATH, --output OUTPUT_PATH specify the output file or directory + -v, --version show program's version number and exit + +misc: + --force-download force automate downloads and exit + --skip-download omit automate downloads and remote lookups + --headless run the program without a user interface + --log-level {error,warn,info,debug} adjust the message severity displayed in the terminal + +execution: + --execution-providers EXECUTION_PROVIDERS [EXECUTION_PROVIDERS ...] accelerate the model inference using different providers (choices: cpu, ...) + --execution-thread-count [1-128] specify the amount of parallel threads while processing + --execution-queue-count [1-32] specify the amount of frames each thread is processing + +memory: + --video-memory-strategy {strict,moderate,tolerant} balance fast frame processing and low VRAM usage + --system-memory-limit [0-128] limit the available RAM that can be used while processing + +face analyser: + --face-analyser-order {left-right,right-left,top-bottom,bottom-top,small-large,large-small,best-worst,worst-best} specify the order in which the face analyser detects faces + --face-analyser-age {child,teen,adult,senior} filter the detected faces based on their age + --face-analyser-gender {female,male} filter the detected faces based on their gender + --face-detector-model {many,retinaface,scrfd,yoloface,yunet} choose the model responsible for detecting the face + --face-detector-size FACE_DETECTOR_SIZE specify the size of the frame provided to the face detector + --face-detector-score [0.0-1.0] filter the detected faces base on the confidence score + --face-landmarker-score [0.0-1.0] filter the detected landmarks base on the confidence score + +face selector: + --face-selector-mode {many,one,reference} use reference based tracking or simple matching + --reference-face-position REFERENCE_FACE_POSITION specify the position used to create the reference face + --reference-face-distance [0.0-1.5] specify the desired similarity between the reference face and target face + --reference-frame-number REFERENCE_FRAME_NUMBER specify the frame used to create the reference face + +face mask: + --face-mask-types FACE_MASK_TYPES [FACE_MASK_TYPES ...] mix and match different face mask types (choices: box, occlusion, region) + --face-mask-blur [0.0-1.0] specify the degree of blur applied the box mask + --face-mask-padding FACE_MASK_PADDING [FACE_MASK_PADDING ...] apply top, right, bottom and left padding to the box mask + --face-mask-regions FACE_MASK_REGIONS [FACE_MASK_REGIONS ...] choose the facial features used for the region mask (choices: skin, left-eyebrow, right-eyebrow, left-eye, right-eye, glasses, nose, mouth, upper-lip, lower-lip) + +frame extraction: + --trim-frame-start TRIM_FRAME_START specify the the start frame of the target video + --trim-frame-end TRIM_FRAME_END specify the the end frame of the target video + --temp-frame-format {bmp,jpg,png} specify the temporary resources format + --keep-temp keep the temporary resources after processing + +output creation: + --output-image-quality [0-100] specify the image quality which translates to the compression factor + --output-image-resolution OUTPUT_IMAGE_RESOLUTION specify the image output resolution based on the target image + --output-video-encoder {libx264,libx265,libvpx-vp9,h264_nvenc,hevc_nvenc,h264_amf,hevc_amf} specify the encoder use for the video compression + --output-video-preset {ultrafast,superfast,veryfast,faster,fast,medium,slow,slower,veryslow} balance fast video processing and video file size + --output-video-quality [0-100] specify the video quality which translates to the compression factor + --output-video-resolution OUTPUT_VIDEO_RESOLUTION specify the video output resolution based on the target video + --output-video-fps OUTPUT_VIDEO_FPS specify the video output fps based on the target video + --skip-audio omit the audio from the target video + +frame processors: + --frame-processors FRAME_PROCESSORS [FRAME_PROCESSORS ...] load a single or multiple frame processors. (choices: face_debugger, face_enhancer, face_swapper, frame_colorizer, frame_enhancer, lip_syncer, ...) + --face-debugger-items FACE_DEBUGGER_ITEMS [FACE_DEBUGGER_ITEMS ...] load a single or multiple frame processors (choices: bounding-box, face-landmark-5, face-landmark-5/68, face-landmark-68, face-landmark-68/5, face-mask, face-detector-score, face-landmarker-score, age, gender) + --face-enhancer-model {codeformer,gfpgan_1.2,gfpgan_1.3,gfpgan_1.4,gpen_bfr_256,gpen_bfr_512,gpen_bfr_1024,gpen_bfr_2048,restoreformer_plus_plus} choose the model responsible for enhancing the face + --face-enhancer-blend [0-100] blend the enhanced into the previous face + --face-swapper-model {blendswap_256,inswapper_128,inswapper_128_fp16,simswap_256,simswap_512_unofficial,uniface_256} choose the model responsible for swapping the face + --frame-colorizer-model {ddcolor,ddcolor_artistic,deoldify,deoldify_artistic,deoldify_stable} choose the model responsible for colorizing the frame + --frame-colorizer-blend [0-100] blend the colorized into the previous frame + --frame-colorizer-size {192x192,256x256,384x384,512x512} specify the size of the frame provided to the frame colorizer + --frame-enhancer-model {lsdir_x4,nomos8k_sc_x4,real_esrgan_x2,real_esrgan_x2_fp16,real_esrgan_x4,real_esrgan_x4_fp16,real_hatgan_x4,span_kendata_x4} choose the model responsible for enhancing the frame + --frame-enhancer-blend [0-100] blend the enhanced into the previous frame + --lip-syncer-model {wav2lip_gan} choose the model responsible for syncing the lips + +uis: + --ui-layouts UI_LAYOUTS [UI_LAYOUTS ...] launch a single or multiple UI layouts (choices: benchmark, default, webcam, ...) +``` + + +Documentation +------------- + +Read the [documentation](https://docs.facefusion.io) for a deep dive. diff --git a/facefusion.ini b/facefusion.ini new file mode 100644 index 0000000000000000000000000000000000000000..171542f364db5c6c5a4677acc3592a2ea636d63c --- /dev/null +++ b/facefusion.ini @@ -0,0 +1,72 @@ +[general] +source_paths = +target_path = +output_path = + +[misc] +force_download = +skip_download = +headless = +log_level = + +[execution] +execution_providers = +execution_thread_count = +execution_queue_count = + +[memory] +video_memory_strategy = +system_memory_limit = + +[face_analyser] +face_analyser_order = +face_analyser_age = +face_analyser_gender = +face_detector_model = +face_detector_size = +face_detector_score = +face_landmarker_score = + +[face_selector] +face_selector_mode = +reference_face_position = +reference_face_distance = +reference_frame_number = + +[face_mask] +face_mask_types = +face_mask_blur = +face_mask_padding = +face_mask_regions = + +[frame_extraction] +trim_frame_start = +trim_frame_end = +temp_frame_format = +keep_temp = + +[output_creation] +output_image_quality = +output_image_resolution = +output_video_encoder = +output_video_preset = +output_video_quality = +output_video_resolution = +output_video_fps = +skip_audio = + +[frame_processors] +frame_processors = +face_debugger_items = +face_enhancer_model = +face_enhancer_blend = +face_swapper_model = +frame_colorizer_model = +frame_colorizer_blend = +frame_colorizer_size = +frame_enhancer_model = +frame_enhancer_blend = +lip_syncer_model = + +[uis] +ui_layouts = diff --git a/facefusion/__init__.py b/facefusion/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/facefusion/__pycache__/__init__.cpython-310.pyc b/facefusion/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..86935eee8e05feb5962420078f6d6cdddf088b37 Binary files /dev/null and b/facefusion/__pycache__/__init__.cpython-310.pyc differ diff --git a/facefusion/__pycache__/__init__.cpython-311.pyc b/facefusion/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..673d453c80ab595098d691b2f90d9b4c24d02fa9 Binary files /dev/null and b/facefusion/__pycache__/__init__.cpython-311.pyc differ diff --git a/facefusion/__pycache__/audio.cpython-310.pyc b/facefusion/__pycache__/audio.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..99636ccee1a88b7a94ac7539b772aa0cc4107a91 Binary files /dev/null and b/facefusion/__pycache__/audio.cpython-310.pyc differ diff --git a/facefusion/__pycache__/choices.cpython-310.pyc b/facefusion/__pycache__/choices.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d006a4588607d818239719b51553b5dffac0c351 Binary files /dev/null and b/facefusion/__pycache__/choices.cpython-310.pyc differ diff --git a/facefusion/__pycache__/common_helper.cpython-310.pyc b/facefusion/__pycache__/common_helper.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..58de18e00b521424d727c5cc2b8a0fc3c1d89d76 Binary files /dev/null and b/facefusion/__pycache__/common_helper.cpython-310.pyc differ diff --git a/facefusion/__pycache__/config.cpython-310.pyc b/facefusion/__pycache__/config.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..121a9c6d6b57907e8cb15fb16e292c448636333c Binary files /dev/null and b/facefusion/__pycache__/config.cpython-310.pyc differ diff --git a/facefusion/__pycache__/content_analyser.cpython-310.pyc b/facefusion/__pycache__/content_analyser.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..186ee1ea08f2443e67bcab369a22453c5de49ff6 Binary files /dev/null and b/facefusion/__pycache__/content_analyser.cpython-310.pyc differ diff --git a/facefusion/__pycache__/core.cpython-310.pyc b/facefusion/__pycache__/core.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eb79bbbbf9027e76177f6b35110c3f3e8ce45fb7 Binary files /dev/null and b/facefusion/__pycache__/core.cpython-310.pyc differ diff --git a/facefusion/__pycache__/download.cpython-310.pyc b/facefusion/__pycache__/download.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c7cc1c901df06cbbf07fa326e0c12fd9ed93f8ae Binary files /dev/null and b/facefusion/__pycache__/download.cpython-310.pyc differ diff --git a/facefusion/__pycache__/execution.cpython-310.pyc b/facefusion/__pycache__/execution.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cc491ff8becad58af342f72e959d8411ec1a942e Binary files /dev/null and b/facefusion/__pycache__/execution.cpython-310.pyc differ diff --git a/facefusion/__pycache__/face_analyser.cpython-310.pyc b/facefusion/__pycache__/face_analyser.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eedf37391645742c0aeef0151e6b6e7308f41945 Binary files /dev/null and b/facefusion/__pycache__/face_analyser.cpython-310.pyc differ diff --git a/facefusion/__pycache__/face_helper.cpython-310.pyc b/facefusion/__pycache__/face_helper.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9b9dfd83a2d484e7c097cc47cc86bc07925c1fa0 Binary files /dev/null and b/facefusion/__pycache__/face_helper.cpython-310.pyc differ diff --git a/facefusion/__pycache__/face_masker.cpython-310.pyc b/facefusion/__pycache__/face_masker.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c56e129a6394c8d50873398dd850b23ac6002e8 Binary files /dev/null and b/facefusion/__pycache__/face_masker.cpython-310.pyc differ diff --git a/facefusion/__pycache__/face_store.cpython-310.pyc b/facefusion/__pycache__/face_store.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..03b0ea9854c7290865696b8d61302c733db774a1 Binary files /dev/null and b/facefusion/__pycache__/face_store.cpython-310.pyc differ diff --git a/facefusion/__pycache__/ffmpeg.cpython-310.pyc b/facefusion/__pycache__/ffmpeg.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4e68e019de26d66967c0378165f514030c1339ae Binary files /dev/null and b/facefusion/__pycache__/ffmpeg.cpython-310.pyc differ diff --git a/facefusion/__pycache__/filesystem.cpython-310.pyc b/facefusion/__pycache__/filesystem.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff15db6de495a062eae77064451cc795a073a76c Binary files /dev/null and b/facefusion/__pycache__/filesystem.cpython-310.pyc differ diff --git a/facefusion/__pycache__/globals.cpython-310.pyc b/facefusion/__pycache__/globals.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6080f2be92196bae0980e527f516f02f148afd41 Binary files /dev/null and b/facefusion/__pycache__/globals.cpython-310.pyc differ diff --git a/facefusion/__pycache__/installer.cpython-310.pyc b/facefusion/__pycache__/installer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3aba78e63c7fc0126d5abee80805bd1df117c90f Binary files /dev/null and b/facefusion/__pycache__/installer.cpython-310.pyc differ diff --git a/facefusion/__pycache__/installer.cpython-311.pyc b/facefusion/__pycache__/installer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a6835c8c17d7cbafbc963835bf324184c82baecb Binary files /dev/null and b/facefusion/__pycache__/installer.cpython-311.pyc differ diff --git a/facefusion/__pycache__/logger.cpython-310.pyc b/facefusion/__pycache__/logger.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ddb66d8e0d732808b03d3ce5a7dcc7be36fc5206 Binary files /dev/null and b/facefusion/__pycache__/logger.cpython-310.pyc differ diff --git a/facefusion/__pycache__/memory.cpython-310.pyc b/facefusion/__pycache__/memory.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..63683a1e5d7ebed9986150f1dbab6bf7b8b93708 Binary files /dev/null and b/facefusion/__pycache__/memory.cpython-310.pyc differ diff --git a/facefusion/__pycache__/metadata.cpython-310.pyc b/facefusion/__pycache__/metadata.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cc0d1ffc4b5915afde37b5aa682690e7325f3669 Binary files /dev/null and b/facefusion/__pycache__/metadata.cpython-310.pyc differ diff --git a/facefusion/__pycache__/metadata.cpython-311.pyc b/facefusion/__pycache__/metadata.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..69c5ccd7a0a141543c8928a3aba550e33655406f Binary files /dev/null and b/facefusion/__pycache__/metadata.cpython-311.pyc differ diff --git a/facefusion/__pycache__/normalizer.cpython-310.pyc b/facefusion/__pycache__/normalizer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..caef84a40ad7e542e20b1b88d33c53229dcc7465 Binary files /dev/null and b/facefusion/__pycache__/normalizer.cpython-310.pyc differ diff --git a/facefusion/__pycache__/process_manager.cpython-310.pyc b/facefusion/__pycache__/process_manager.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..17b74d1cf9b035b3318ade5a31ace2e4883d73d4 Binary files /dev/null and b/facefusion/__pycache__/process_manager.cpython-310.pyc differ diff --git a/facefusion/__pycache__/statistics.cpython-310.pyc b/facefusion/__pycache__/statistics.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ec69d2133fdac1fb0f07a69839a72aa8cf1ff18b Binary files /dev/null and b/facefusion/__pycache__/statistics.cpython-310.pyc differ diff --git a/facefusion/__pycache__/thread_helper.cpython-310.pyc b/facefusion/__pycache__/thread_helper.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..92c7b15b552ac32d699499024dd4f37de3a1e673 Binary files /dev/null and b/facefusion/__pycache__/thread_helper.cpython-310.pyc differ diff --git a/facefusion/__pycache__/typing.cpython-310.pyc b/facefusion/__pycache__/typing.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f66ccc280a751959def70ee14ee01b6734e54c96 Binary files /dev/null and b/facefusion/__pycache__/typing.cpython-310.pyc differ diff --git a/facefusion/__pycache__/vision.cpython-310.pyc b/facefusion/__pycache__/vision.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..76f226d28651099d08f2c4501c97325a33895bd9 Binary files /dev/null and b/facefusion/__pycache__/vision.cpython-310.pyc differ diff --git a/facefusion/__pycache__/voice_extractor.cpython-310.pyc b/facefusion/__pycache__/voice_extractor.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..07c49030510402b506d47d9bfc0394be14a52daf Binary files /dev/null and b/facefusion/__pycache__/voice_extractor.cpython-310.pyc differ diff --git a/facefusion/__pycache__/wording.cpython-310.pyc b/facefusion/__pycache__/wording.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..44a172d93395763e4c3e0e71f816ba0e636d26d3 Binary files /dev/null and b/facefusion/__pycache__/wording.cpython-310.pyc differ diff --git a/facefusion/__pycache__/wording.cpython-311.pyc b/facefusion/__pycache__/wording.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2e26c43f393de38589fc68af0cb1d2580dbfe755 Binary files /dev/null and b/facefusion/__pycache__/wording.cpython-311.pyc differ diff --git a/facefusion/audio.py b/facefusion/audio.py new file mode 100644 index 0000000000000000000000000000000000000000..de800502a8da127daf16e10d9a2d7eeee06c926e --- /dev/null +++ b/facefusion/audio.py @@ -0,0 +1,137 @@ +from typing import Optional, Any, List +from functools import lru_cache +import numpy +import scipy + +from facefusion.filesystem import is_audio +from facefusion.ffmpeg import read_audio_buffer +from facefusion.typing import Fps, Audio, AudioFrame, Spectrogram, MelFilterBank +from facefusion.voice_extractor import batch_extract_voice + + +@lru_cache(maxsize = 128) +def read_static_audio(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: + return read_audio(audio_path, fps) + + +def read_audio(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: + sample_rate = 48000 + channel_total = 2 + + if is_audio(audio_path): + audio_buffer = read_audio_buffer(audio_path, sample_rate, channel_total) + audio = numpy.frombuffer(audio_buffer, dtype = numpy.int16).reshape(-1, 2) + audio = prepare_audio(audio) + spectrogram = create_spectrogram(audio) + audio_frames = extract_audio_frames(spectrogram, fps) + return audio_frames + return None + + +@lru_cache(maxsize = 128) +def read_static_voice(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: + return read_voice(audio_path, fps) + + +def read_voice(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: + sample_rate = 48000 + channel_total = 2 + chunk_size = 1024 * 240 + step_size = 1024 * 180 + + if is_audio(audio_path): + audio_buffer = read_audio_buffer(audio_path, sample_rate, channel_total) + audio = numpy.frombuffer(audio_buffer, dtype = numpy.int16).reshape(-1, 2) + audio = batch_extract_voice(audio, chunk_size, step_size) + audio = prepare_voice(audio) + spectrogram = create_spectrogram(audio) + audio_frames = extract_audio_frames(spectrogram, fps) + return audio_frames + return None + + +def get_audio_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Optional[AudioFrame]: + if is_audio(audio_path): + audio_frames = read_static_audio(audio_path, fps) + if frame_number in range(len(audio_frames)): + return audio_frames[frame_number] + return None + + +def get_voice_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Optional[AudioFrame]: + if is_audio(audio_path): + voice_frames = read_static_voice(audio_path, fps) + if frame_number in range(len(voice_frames)): + return voice_frames[frame_number] + return None + + +def create_empty_audio_frame() -> AudioFrame: + mel_filter_total = 80 + step_size = 16 + audio_frame = numpy.zeros((mel_filter_total, step_size)).astype(numpy.int16) + return audio_frame + + +def prepare_audio(audio : numpy.ndarray[Any, Any]) -> Audio: + if audio.ndim > 1: + audio = numpy.mean(audio, axis = 1) + audio = audio / numpy.max(numpy.abs(audio), axis = 0) + audio = scipy.signal.lfilter([ 1.0, -0.97 ], [ 1.0 ], audio) + return audio + + +def prepare_voice(audio : numpy.ndarray[Any, Any]) -> Audio: + sample_rate = 48000 + resample_rate = 16000 + + audio = scipy.signal.resample(audio, int(len(audio) * resample_rate / sample_rate)) + audio = prepare_audio(audio) + return audio + + +def convert_hertz_to_mel(hertz : float) -> float: + return 2595 * numpy.log10(1 + hertz / 700) + + +def convert_mel_to_hertz(mel : numpy.ndarray[Any, Any]) -> numpy.ndarray[Any, Any]: + return 700 * (10 ** (mel / 2595) - 1) + + +def create_mel_filter_bank() -> MelFilterBank: + mel_filter_total = 80 + mel_bin_total = 800 + sample_rate = 16000 + min_frequency = 55.0 + max_frequency = 7600.0 + mel_filter_bank = numpy.zeros((mel_filter_total, mel_bin_total // 2 + 1)) + mel_frequency_range = numpy.linspace(convert_hertz_to_mel(min_frequency), convert_hertz_to_mel(max_frequency), mel_filter_total + 2) + indices = numpy.floor((mel_bin_total + 1) * convert_mel_to_hertz(mel_frequency_range) / sample_rate).astype(numpy.int16) + + for index in range(mel_filter_total): + start = indices[index] + end = indices[index + 1] + mel_filter_bank[index, start:end] = scipy.signal.windows.triang(end - start) + return mel_filter_bank + + +def create_spectrogram(audio : Audio) -> Spectrogram: + mel_bin_total = 800 + mel_bin_overlap = 600 + mel_filter_bank = create_mel_filter_bank() + spectrogram = scipy.signal.stft(audio, nperseg = mel_bin_total, nfft = mel_bin_total, noverlap = mel_bin_overlap)[2] + spectrogram = numpy.dot(mel_filter_bank, numpy.abs(spectrogram)) + return spectrogram + + +def extract_audio_frames(spectrogram : Spectrogram, fps : Fps) -> List[AudioFrame]: + mel_filter_total = 80 + step_size = 16 + audio_frames = [] + indices = numpy.arange(0, spectrogram.shape[1], mel_filter_total / fps).astype(numpy.int16) + indices = indices[indices >= step_size] + + for index in indices: + start = max(0, index - step_size) + audio_frames.append(spectrogram[:, start:index]) + return audio_frames diff --git a/facefusion/choices.py b/facefusion/choices.py new file mode 100644 index 0000000000000000000000000000000000000000..e5587b85a45ad424081b5166766a5c7c01029e1d --- /dev/null +++ b/facefusion/choices.py @@ -0,0 +1,37 @@ +from typing import List, Dict + +from facefusion.typing import VideoMemoryStrategy, FaceSelectorMode, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, FaceDetectorModel, FaceMaskType, FaceMaskRegion, TempFrameFormat, OutputVideoEncoder, OutputVideoPreset +from facefusion.common_helper import create_int_range, create_float_range + +video_memory_strategies : List[VideoMemoryStrategy] = [ 'strict', 'moderate', 'tolerant' ] +face_analyser_orders : List[FaceAnalyserOrder] = [ 'left-right', 'right-left', 'top-bottom', 'bottom-top', 'small-large', 'large-small', 'best-worst', 'worst-best' ] +face_analyser_ages : List[FaceAnalyserAge] = [ 'child', 'teen', 'adult', 'senior' ] +face_analyser_genders : List[FaceAnalyserGender] = [ 'female', 'male' ] +face_detector_set : Dict[FaceDetectorModel, List[str]] =\ +{ + 'many': [ '640x640' ], + 'retinaface': [ '160x160', '320x320', '480x480', '512x512', '640x640' ], + 'scrfd': [ '160x160', '320x320', '480x480', '512x512', '640x640' ], + 'yoloface': [ '640x640' ], + 'yunet': [ '160x160', '320x320', '480x480', '512x512', '640x640', '768x768', '960x960', '1024x1024' ] +} +face_selector_modes : List[FaceSelectorMode] = [ 'many', 'one', 'reference' ] +face_mask_types : List[FaceMaskType] = [ 'box', 'occlusion', 'region' ] +face_mask_regions : List[FaceMaskRegion] = [ 'skin', 'left-eyebrow', 'right-eyebrow', 'left-eye', 'right-eye', 'glasses', 'nose', 'mouth', 'upper-lip', 'lower-lip' ] +temp_frame_formats : List[TempFrameFormat] = [ 'bmp', 'jpg', 'png' ] +output_video_encoders : List[OutputVideoEncoder] = [ 'libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf' ] +output_video_presets : List[OutputVideoPreset] = [ 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow' ] + +image_template_sizes : List[float] = [ 0.25, 0.5, 0.75, 1, 1.5, 2, 2.5, 3, 3.5, 4 ] +video_template_sizes : List[int] = [ 240, 360, 480, 540, 720, 1080, 1440, 2160, 4320 ] + +execution_thread_count_range : List[int] = create_int_range(1, 128, 1) +execution_queue_count_range : List[int] = create_int_range(1, 32, 1) +system_memory_limit_range : List[int] = create_int_range(0, 128, 1) +face_detector_score_range : List[float] = create_float_range(0.0, 1.0, 0.05) +face_landmarker_score_range : List[float] = create_float_range(0.0, 1.0, 0.05) +face_mask_blur_range : List[float] = create_float_range(0.0, 1.0, 0.05) +face_mask_padding_range : List[int] = create_int_range(0, 100, 1) +reference_face_distance_range : List[float] = create_float_range(0.0, 1.5, 0.05) +output_image_quality_range : List[int] = create_int_range(0, 100, 1) +output_video_quality_range : List[int] = create_int_range(0, 100, 1) diff --git a/facefusion/common_helper.py b/facefusion/common_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..ea1ec8304d385d024605b480a2c50aa1847e75ef --- /dev/null +++ b/facefusion/common_helper.py @@ -0,0 +1,18 @@ +from typing import List, Any +import numpy + + +def create_metavar(ranges : List[Any]) -> str: + return '[' + str(ranges[0]) + '-' + str(ranges[-1]) + ']' + + +def create_int_range(start : int, stop : int, step : int) -> List[int]: + return (numpy.arange(start, stop + step, step)).tolist() + + +def create_float_range(start : float, stop : float, step : float) -> List[float]: + return (numpy.around(numpy.arange(start, stop + step, step), decimals = 2)).tolist() + + +def get_first(__list__ : Any) -> Any: + return next(iter(__list__), None) diff --git a/facefusion/config.py b/facefusion/config.py new file mode 100644 index 0000000000000000000000000000000000000000..675ef3f15c8b8f6bb487514cecd9ab09a2ab6311 --- /dev/null +++ b/facefusion/config.py @@ -0,0 +1,92 @@ +from configparser import ConfigParser +from typing import Any, Optional, List + +from facefusion.filesystem import resolve_relative_path + +CONFIG = None + + +def get_config() -> ConfigParser: + global CONFIG + + if CONFIG is None: + config_path = resolve_relative_path('../facefusion.ini') + CONFIG = ConfigParser() + CONFIG.read(config_path, encoding = 'utf-8') + return CONFIG + + +def clear_config() -> None: + global CONFIG + + CONFIG = None + + +def get_str_value(key : str, fallback : Optional[str] = None) -> Optional[str]: + value = get_value_by_notation(key) + + if value or fallback: + return str(value or fallback) + return None + + +def get_int_value(key : str, fallback : Optional[str] = None) -> Optional[int]: + value = get_value_by_notation(key) + + if value or fallback: + return int(value or fallback) + return None + + +def get_float_value(key : str, fallback : Optional[str] = None) -> Optional[float]: + value = get_value_by_notation(key) + + if value or fallback: + return float(value or fallback) + return None + + +def get_bool_value(key : str, fallback : Optional[str] = None) -> Optional[bool]: + value = get_value_by_notation(key) + + if value == 'True' or fallback == 'True': + return True + if value == 'False' or fallback == 'False': + return False + return None + + +def get_str_list(key : str, fallback : Optional[str] = None) -> Optional[List[str]]: + value = get_value_by_notation(key) + + if value or fallback: + return [ str(value) for value in (value or fallback).split(' ') ] + return None + + +def get_int_list(key : str, fallback : Optional[str] = None) -> Optional[List[int]]: + value = get_value_by_notation(key) + + if value or fallback: + return [ int(value) for value in (value or fallback).split(' ') ] + return None + + +def get_float_list(key : str, fallback : Optional[str] = None) -> Optional[List[float]]: + value = get_value_by_notation(key) + + if value or fallback: + return [ float(value) for value in (value or fallback).split(' ') ] + return None + + +def get_value_by_notation(key : str) -> Optional[Any]: + config = get_config() + + if '.' in key: + section, name = key.split('.') + if section in config and name in config[section]: + return config[section][name] + if key in config: + return config[key] + return None diff --git a/facefusion/content_analyser.py b/facefusion/content_analyser.py new file mode 100644 index 0000000000000000000000000000000000000000..3818f5ae816575adbd83f16da7cf3df02ee802e9 --- /dev/null +++ b/facefusion/content_analyser.py @@ -0,0 +1,112 @@ +from typing import Any +from functools import lru_cache +from time import sleep +import cv2 +import numpy +import onnxruntime +from tqdm import tqdm + +import facefusion.globals +from facefusion import process_manager, wording +from facefusion.thread_helper import thread_lock, conditional_thread_semaphore +from facefusion.typing import VisionFrame, ModelSet, Fps +from facefusion.execution import apply_execution_provider_options +from facefusion.vision import get_video_frame, count_video_frame_total, read_image, detect_video_fps +from facefusion.filesystem import resolve_relative_path, is_file +from facefusion.download import conditional_download + +CONTENT_ANALYSER = None +MODELS : ModelSet =\ +{ + 'open_nsfw': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/open_nsfw.onnx', + 'path': resolve_relative_path('../.assets/models/open_nsfw.onnx') + } +} +PROBABILITY_LIMIT = 0.80 +RATE_LIMIT = 10 +STREAM_COUNTER = 0 + + +def get_content_analyser() -> Any: + global CONTENT_ANALYSER + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if CONTENT_ANALYSER is None: + model_path = MODELS.get('open_nsfw').get('path') + CONTENT_ANALYSER = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + return CONTENT_ANALYSER + + +def clear_content_analyser() -> None: + global CONTENT_ANALYSER + + CONTENT_ANALYSER = None + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../.assets/models') + model_url = MODELS.get('open_nsfw').get('url') + model_path = MODELS.get('open_nsfw').get('path') + + if not facefusion.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, [ model_url ]) + process_manager.end() + return is_file(model_path) + + +def analyse_stream(vision_frame : VisionFrame, video_fps : Fps) -> bool: + global STREAM_COUNTER + + STREAM_COUNTER = STREAM_COUNTER + 1 + if STREAM_COUNTER % int(video_fps) == 0: + return analyse_frame(vision_frame) + return False + + +def analyse_frame(vision_frame : VisionFrame) -> bool: + content_analyser = get_content_analyser() + vision_frame = prepare_frame(vision_frame) + with conditional_thread_semaphore(facefusion.globals.execution_providers): + probability = content_analyser.run(None, + { + content_analyser.get_inputs()[0].name: vision_frame + })[0][0][1] + return probability > PROBABILITY_LIMIT + + +def prepare_frame(vision_frame : VisionFrame) -> VisionFrame: + vision_frame = cv2.resize(vision_frame, (224, 224)).astype(numpy.float32) + vision_frame -= numpy.array([ 104, 117, 123 ]).astype(numpy.float32) + vision_frame = numpy.expand_dims(vision_frame, axis = 0) + return vision_frame + + +@lru_cache(maxsize = None) +def analyse_image(image_path : str) -> bool: + frame = read_image(image_path) + return analyse_frame(frame) + + +@lru_cache(maxsize = None) +def analyse_video(video_path : str, start_frame : int, end_frame : int) -> bool: + video_frame_total = count_video_frame_total(video_path) + video_fps = detect_video_fps(video_path) + frame_range = range(start_frame or 0, end_frame or video_frame_total) + rate = 0.0 + counter = 0 + + with tqdm(total = len(frame_range), desc = wording.get('analysing'), unit = 'frame', ascii = ' =', disable = facefusion.globals.log_level in [ 'warn', 'error' ]) as progress: + for frame_number in frame_range: + if frame_number % int(video_fps) == 0: + frame = get_video_frame(video_path, frame_number) + if analyse_frame(frame): + counter += 1 + rate = counter * int(video_fps) / len(frame_range) * 100 + progress.update() + progress.set_postfix(rate = rate) + return rate > RATE_LIMIT diff --git a/facefusion/core.py b/facefusion/core.py new file mode 100644 index 0000000000000000000000000000000000000000..c3a212085e934a50bc9b846a905cafac36110905 --- /dev/null +++ b/facefusion/core.py @@ -0,0 +1,416 @@ +import os + +os.environ['OMP_NUM_THREADS'] = '1' + +import signal +import sys +import warnings +import shutil +import numpy +import onnxruntime +from time import sleep, time +from argparse import ArgumentParser, HelpFormatter + +import facefusion.choices +import facefusion.globals +from facefusion.face_analyser import get_one_face, get_average_face +from facefusion.face_store import get_reference_faces, append_reference_face +from facefusion import face_analyser, face_masker, content_analyser, config, process_manager, metadata, logger, wording, voice_extractor +from facefusion.content_analyser import analyse_image, analyse_video +from facefusion.processors.frame.core import get_frame_processors_modules, load_frame_processor_module +from facefusion.common_helper import create_metavar, get_first +from facefusion.execution import encode_execution_providers, decode_execution_providers +from facefusion.normalizer import normalize_output_path, normalize_padding, normalize_fps +from facefusion.memory import limit_system_memory +from facefusion.statistics import conditional_log_statistics +from facefusion.download import conditional_download +from facefusion.filesystem import list_directory, get_temp_frame_paths, create_temp, move_temp, clear_temp, is_image, is_video, filter_audio_paths, resolve_relative_path +from facefusion.ffmpeg import extract_frames, merge_video, copy_image, finalize_image, restore_audio, replace_audio +from facefusion.vision import read_image, read_static_images, detect_image_resolution, restrict_video_fps, create_image_resolutions, get_video_frame, detect_video_resolution, detect_video_fps, restrict_video_resolution, restrict_image_resolution, create_video_resolutions, pack_resolution, unpack_resolution + +onnxruntime.set_default_logger_severity(3) +warnings.filterwarnings('ignore', category = UserWarning, module = 'gradio') + + +def cli() -> None: + signal.signal(signal.SIGINT, lambda signal_number, frame: destroy()) + program = ArgumentParser(formatter_class = lambda prog: HelpFormatter(prog, max_help_position = 160), add_help = False) + # general + program.add_argument('-s', '--source', help = wording.get('help.source'), action = 'append', dest = 'source_paths', default = config.get_str_list('general.source_paths')) + program.add_argument('-t', '--target', help = wording.get('help.target'), dest = 'target_path', default = config.get_str_value('general.target_path')) + program.add_argument('-o', '--output', help = wording.get('help.output'), dest = 'output_path', default = config.get_str_value('general.output_path')) + program.add_argument('-v', '--version', version = metadata.get('name') + ' ' + metadata.get('version'), action = 'version') + # misc + group_misc = program.add_argument_group('misc') + group_misc.add_argument('--force-download', help = wording.get('help.force_download'), action = 'store_true', default = config.get_bool_value('misc.force_download')) + group_misc.add_argument('--skip-download', help = wording.get('help.skip_download'), action = 'store_true', default = config.get_bool_value('misc.skip_download')) + group_misc.add_argument('--headless', help = wording.get('help.headless'), action = 'store_true', default = config.get_bool_value('misc.headless')) + group_misc.add_argument('--log-level', help = wording.get('help.log_level'), default = config.get_str_value('misc.log_level', 'info'), choices = logger.get_log_levels()) + # execution + execution_providers = encode_execution_providers(onnxruntime.get_available_providers()) + group_execution = program.add_argument_group('execution') + group_execution.add_argument('--execution-providers', help = wording.get('help.execution_providers').format(choices = ', '.join(execution_providers)), default = config.get_str_list('execution.execution_providers', 'cpu'), choices = execution_providers, nargs = '+', metavar = 'EXECUTION_PROVIDERS') + group_execution.add_argument('--execution-thread-count', help = wording.get('help.execution_thread_count'), type = int, default = config.get_int_value('execution.execution_thread_count', '4'), choices = facefusion.choices.execution_thread_count_range, metavar = create_metavar(facefusion.choices.execution_thread_count_range)) + group_execution.add_argument('--execution-queue-count', help = wording.get('help.execution_queue_count'), type = int, default = config.get_int_value('execution.execution_queue_count', '1'), choices = facefusion.choices.execution_queue_count_range, metavar = create_metavar(facefusion.choices.execution_queue_count_range)) + # memory + group_memory = program.add_argument_group('memory') + group_memory.add_argument('--video-memory-strategy', help = wording.get('help.video_memory_strategy'), default = config.get_str_value('memory.video_memory_strategy', 'strict'), choices = facefusion.choices.video_memory_strategies) + group_memory.add_argument('--system-memory-limit', help = wording.get('help.system_memory_limit'), type = int, default = config.get_int_value('memory.system_memory_limit', '0'), choices = facefusion.choices.system_memory_limit_range, metavar = create_metavar(facefusion.choices.system_memory_limit_range)) + # face analyser + group_face_analyser = program.add_argument_group('face analyser') + group_face_analyser.add_argument('--face-analyser-order', help = wording.get('help.face_analyser_order'), default = config.get_str_value('face_analyser.face_analyser_order', 'left-right'), choices = facefusion.choices.face_analyser_orders) + group_face_analyser.add_argument('--face-analyser-age', help = wording.get('help.face_analyser_age'), default = config.get_str_value('face_analyser.face_analyser_age'), choices = facefusion.choices.face_analyser_ages) + group_face_analyser.add_argument('--face-analyser-gender', help = wording.get('help.face_analyser_gender'), default = config.get_str_value('face_analyser.face_analyser_gender'), choices = facefusion.choices.face_analyser_genders) + group_face_analyser.add_argument('--face-detector-model', help = wording.get('help.face_detector_model'), default = config.get_str_value('face_analyser.face_detector_model', 'yoloface'), choices = facefusion.choices.face_detector_set.keys()) + group_face_analyser.add_argument('--face-detector-size', help = wording.get('help.face_detector_size'), default = config.get_str_value('face_analyser.face_detector_size', '640x640')) + group_face_analyser.add_argument('--face-detector-score', help = wording.get('help.face_detector_score'), type = float, default = config.get_float_value('face_analyser.face_detector_score', '0.5'), choices = facefusion.choices.face_detector_score_range, metavar = create_metavar(facefusion.choices.face_detector_score_range)) + group_face_analyser.add_argument('--face-landmarker-score', help = wording.get('help.face_landmarker_score'), type = float, default = config.get_float_value('face_analyser.face_landmarker_score', '0.5'), choices = facefusion.choices.face_landmarker_score_range, metavar = create_metavar(facefusion.choices.face_landmarker_score_range)) + # face selector + group_face_selector = program.add_argument_group('face selector') + group_face_selector.add_argument('--face-selector-mode', help = wording.get('help.face_selector_mode'), default = config.get_str_value('face_selector.face_selector_mode', 'reference'), choices = facefusion.choices.face_selector_modes) + group_face_selector.add_argument('--reference-face-position', help = wording.get('help.reference_face_position'), type = int, default = config.get_int_value('face_selector.reference_face_position', '0')) + group_face_selector.add_argument('--reference-face-distance', help = wording.get('help.reference_face_distance'), type = float, default = config.get_float_value('face_selector.reference_face_distance', '0.6'), choices = facefusion.choices.reference_face_distance_range, metavar = create_metavar(facefusion.choices.reference_face_distance_range)) + group_face_selector.add_argument('--reference-frame-number', help = wording.get('help.reference_frame_number'), type = int, default = config.get_int_value('face_selector.reference_frame_number', '0')) + # face mask + group_face_mask = program.add_argument_group('face mask') + group_face_mask.add_argument('--face-mask-types', help = wording.get('help.face_mask_types').format(choices = ', '.join(facefusion.choices.face_mask_types)), default = config.get_str_list('face_mask.face_mask_types', 'box'), choices = facefusion.choices.face_mask_types, nargs = '+', metavar = 'FACE_MASK_TYPES') + group_face_mask.add_argument('--face-mask-blur', help = wording.get('help.face_mask_blur'), type = float, default = config.get_float_value('face_mask.face_mask_blur', '0.3'), choices = facefusion.choices.face_mask_blur_range, metavar = create_metavar(facefusion.choices.face_mask_blur_range)) + group_face_mask.add_argument('--face-mask-padding', help = wording.get('help.face_mask_padding'), type = int, default = config.get_int_list('face_mask.face_mask_padding', '0 0 0 0'), nargs = '+') + group_face_mask.add_argument('--face-mask-regions', help = wording.get('help.face_mask_regions').format(choices = ', '.join(facefusion.choices.face_mask_regions)), default = config.get_str_list('face_mask.face_mask_regions', ' '.join(facefusion.choices.face_mask_regions)), choices = facefusion.choices.face_mask_regions, nargs = '+', metavar = 'FACE_MASK_REGIONS') + # frame extraction + group_frame_extraction = program.add_argument_group('frame extraction') + group_frame_extraction.add_argument('--trim-frame-start', help = wording.get('help.trim_frame_start'), type = int, default = facefusion.config.get_int_value('frame_extraction.trim_frame_start')) + group_frame_extraction.add_argument('--trim-frame-end', help = wording.get('help.trim_frame_end'), type = int, default = facefusion.config.get_int_value('frame_extraction.trim_frame_end')) + group_frame_extraction.add_argument('--temp-frame-format', help = wording.get('help.temp_frame_format'), default = config.get_str_value('frame_extraction.temp_frame_format', 'png'), choices = facefusion.choices.temp_frame_formats) + group_frame_extraction.add_argument('--keep-temp', help = wording.get('help.keep_temp'), action = 'store_true', default = config.get_bool_value('frame_extraction.keep_temp')) + # output creation + group_output_creation = program.add_argument_group('output creation') + group_output_creation.add_argument('--output-image-quality', help = wording.get('help.output_image_quality'), type = int, default = config.get_int_value('output_creation.output_image_quality', '80'), choices = facefusion.choices.output_image_quality_range, metavar = create_metavar(facefusion.choices.output_image_quality_range)) + group_output_creation.add_argument('--output-image-resolution', help = wording.get('help.output_image_resolution'), default = config.get_str_value('output_creation.output_image_resolution')) + group_output_creation.add_argument('--output-video-encoder', help = wording.get('help.output_video_encoder'), default = config.get_str_value('output_creation.output_video_encoder', 'libx264'), choices = facefusion.choices.output_video_encoders) + group_output_creation.add_argument('--output-video-preset', help = wording.get('help.output_video_preset'), default = config.get_str_value('output_creation.output_video_preset', 'veryfast'), choices = facefusion.choices.output_video_presets) + group_output_creation.add_argument('--output-video-quality', help = wording.get('help.output_video_quality'), type = int, default = config.get_int_value('output_creation.output_video_quality', '80'), choices = facefusion.choices.output_video_quality_range, metavar = create_metavar(facefusion.choices.output_video_quality_range)) + group_output_creation.add_argument('--output-video-resolution', help = wording.get('help.output_video_resolution'), default = config.get_str_value('output_creation.output_video_resolution')) + group_output_creation.add_argument('--output-video-fps', help = wording.get('help.output_video_fps'), type = float, default = config.get_str_value('output_creation.output_video_fps')) + group_output_creation.add_argument('--skip-audio', help = wording.get('help.skip_audio'), action = 'store_true', default = config.get_bool_value('output_creation.skip_audio')) + # frame processors + available_frame_processors = list_directory('facefusion/processors/frame/modules') + program = ArgumentParser(parents = [ program ], formatter_class = program.formatter_class, add_help = True) + group_frame_processors = program.add_argument_group('frame processors') + group_frame_processors.add_argument('--frame-processors', help = wording.get('help.frame_processors').format(choices = ', '.join(available_frame_processors)), default = config.get_str_list('frame_processors.frame_processors', 'face_swapper'), nargs = '+') + for frame_processor in available_frame_processors: + frame_processor_module = load_frame_processor_module(frame_processor) + frame_processor_module.register_args(group_frame_processors) + # uis + available_ui_layouts = list_directory('facefusion/uis/layouts') + group_uis = program.add_argument_group('uis') + group_uis.add_argument('--ui-layouts', help = wording.get('help.ui_layouts').format(choices = ', '.join(available_ui_layouts)), default = config.get_str_list('uis.ui_layouts', 'default'), nargs = '+') + run(program) + + +def validate_args(program : ArgumentParser) -> None: + try: + for action in program._actions: + if action.default: + if isinstance(action.default, list): + for default in action.default: + program._check_value(action, default) + else: + program._check_value(action, action.default) + except Exception as exception: + program.error(str(exception)) + + +def apply_args(program : ArgumentParser) -> None: + args = program.parse_args() + # general + facefusion.globals.source_paths = args.source_paths + facefusion.globals.target_path = args.target_path + facefusion.globals.output_path = args.output_path + # misc + facefusion.globals.force_download = args.force_download + facefusion.globals.skip_download = args.skip_download + facefusion.globals.headless = args.headless + facefusion.globals.log_level = args.log_level + # execution + facefusion.globals.execution_providers = decode_execution_providers(args.execution_providers) + facefusion.globals.execution_thread_count = args.execution_thread_count + facefusion.globals.execution_queue_count = args.execution_queue_count + # memory + facefusion.globals.video_memory_strategy = args.video_memory_strategy + facefusion.globals.system_memory_limit = args.system_memory_limit + # face analyser + facefusion.globals.face_analyser_order = args.face_analyser_order + facefusion.globals.face_analyser_age = args.face_analyser_age + facefusion.globals.face_analyser_gender = args.face_analyser_gender + facefusion.globals.face_detector_model = args.face_detector_model + if args.face_detector_size in facefusion.choices.face_detector_set[args.face_detector_model]: + facefusion.globals.face_detector_size = args.face_detector_size + else: + facefusion.globals.face_detector_size = '640x640' + facefusion.globals.face_detector_score = args.face_detector_score + facefusion.globals.face_landmarker_score = args.face_landmarker_score + # face selector + facefusion.globals.face_selector_mode = args.face_selector_mode + facefusion.globals.reference_face_position = args.reference_face_position + facefusion.globals.reference_face_distance = args.reference_face_distance + facefusion.globals.reference_frame_number = args.reference_frame_number + # face mask + facefusion.globals.face_mask_types = args.face_mask_types + facefusion.globals.face_mask_blur = args.face_mask_blur + facefusion.globals.face_mask_padding = normalize_padding(args.face_mask_padding) + facefusion.globals.face_mask_regions = args.face_mask_regions + # frame extraction + facefusion.globals.trim_frame_start = args.trim_frame_start + facefusion.globals.trim_frame_end = args.trim_frame_end + facefusion.globals.temp_frame_format = args.temp_frame_format + facefusion.globals.keep_temp = args.keep_temp + # output creation + facefusion.globals.output_image_quality = args.output_image_quality + if is_image(args.target_path): + output_image_resolution = detect_image_resolution(args.target_path) + output_image_resolutions = create_image_resolutions(output_image_resolution) + if args.output_image_resolution in output_image_resolutions: + facefusion.globals.output_image_resolution = args.output_image_resolution + else: + facefusion.globals.output_image_resolution = pack_resolution(output_image_resolution) + facefusion.globals.output_video_encoder = args.output_video_encoder + facefusion.globals.output_video_preset = args.output_video_preset + facefusion.globals.output_video_quality = args.output_video_quality + if is_video(args.target_path): + output_video_resolution = detect_video_resolution(args.target_path) + output_video_resolutions = create_video_resolutions(output_video_resolution) + if args.output_video_resolution in output_video_resolutions: + facefusion.globals.output_video_resolution = args.output_video_resolution + else: + facefusion.globals.output_video_resolution = pack_resolution(output_video_resolution) + if args.output_video_fps or is_video(args.target_path): + facefusion.globals.output_video_fps = normalize_fps(args.output_video_fps) or detect_video_fps(args.target_path) + facefusion.globals.skip_audio = args.skip_audio + # frame processors + available_frame_processors = list_directory('facefusion/processors/frame/modules') + facefusion.globals.frame_processors = args.frame_processors + for frame_processor in available_frame_processors: + frame_processor_module = load_frame_processor_module(frame_processor) + frame_processor_module.apply_args(program) + # uis + facefusion.globals.ui_layouts = args.ui_layouts + + +def run(program : ArgumentParser) -> None: + validate_args(program) + apply_args(program) + logger.init(facefusion.globals.log_level) + + if facefusion.globals.system_memory_limit > 0: + limit_system_memory(facefusion.globals.system_memory_limit) + if facefusion.globals.force_download: + force_download() + return + if not pre_check() or not content_analyser.pre_check() or not face_analyser.pre_check() or not face_masker.pre_check() or not voice_extractor.pre_check(): + return + for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): + if not frame_processor_module.pre_check(): + return + if facefusion.globals.headless: + conditional_process() + else: + import facefusion.uis.core as ui + + for ui_layout in ui.get_ui_layouts_modules(facefusion.globals.ui_layouts): + if not ui_layout.pre_check(): + return + ui.launch() + + +def destroy() -> None: + # process_manager.stop() + # while process_manager.is_processing(): + # sleep(0.5) + # if facefusion.globals.target_path: + # clear_temp(facefusion.globals.target_path) + sys.exit(0) + + +def pre_check() -> bool: + if sys.version_info < (3, 9): + logger.error(wording.get('python_not_supported').format(version = '3.9'), __name__.upper()) + return False + if not shutil.which('ffmpeg'): + logger.error(wording.get('ffmpeg_not_installed'), __name__.upper()) + return False + return True + + +def conditional_process() -> None: + start_time = time() + for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): + while not frame_processor_module.post_check(): + logger.disable() + sleep(0.5) + logger.enable() + if not frame_processor_module.pre_process('output'): + return + conditional_append_reference_faces() + if is_image(facefusion.globals.target_path): + process_image(start_time) + if is_video(facefusion.globals.target_path): + process_video(start_time) + + +def conditional_append_reference_faces() -> None: + if 'reference' in facefusion.globals.face_selector_mode and not get_reference_faces(): + source_frames = read_static_images(facefusion.globals.source_paths) + source_face = get_average_face(source_frames) + if is_video(facefusion.globals.target_path): + reference_frame = get_video_frame(facefusion.globals.target_path, facefusion.globals.reference_frame_number) + else: + reference_frame = read_image(facefusion.globals.target_path) + reference_face = get_one_face(reference_frame, facefusion.globals.reference_face_position) + append_reference_face('origin', reference_face) + if source_face and reference_face: + for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): + abstract_reference_frame = frame_processor_module.get_reference_frame(source_face, reference_face, reference_frame) + if numpy.any(abstract_reference_frame): + reference_frame = abstract_reference_frame + reference_face = get_one_face(reference_frame, facefusion.globals.reference_face_position) + append_reference_face(frame_processor_module.__name__, reference_face) + + +def force_download() -> None: + download_directory_path = resolve_relative_path('../.assets/models') + available_frame_processors = list_directory('facefusion/processors/frame/modules') + model_list =\ + [ + content_analyser.MODELS, + face_analyser.MODELS, + face_masker.MODELS, + voice_extractor.MODELS + ] + + for frame_processor_module in get_frame_processors_modules(available_frame_processors): + if hasattr(frame_processor_module, 'MODELS'): + model_list.append(frame_processor_module.MODELS) + model_urls = [ models[model].get('url') for models in model_list for model in models ] + conditional_download(download_directory_path, model_urls) + + +def process_image(start_time : float) -> None: + normed_output_path = normalize_output_path(facefusion.globals.target_path, facefusion.globals.output_path) + # if analyse_image(facefusion.globals.target_path): + # return + # copy image + process_manager.start() + temp_image_resolution = pack_resolution(restrict_image_resolution(facefusion.globals.target_path, unpack_resolution(facefusion.globals.output_image_resolution))) + logger.info(wording.get('copying_image').format(resolution = temp_image_resolution), __name__.upper()) + if copy_image(facefusion.globals.target_path, normed_output_path, temp_image_resolution): + logger.debug(wording.get('copying_image_succeed'), __name__.upper()) + else: + logger.error(wording.get('copying_image_failed'), __name__.upper()) + return + # process image + for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): + logger.info(wording.get('processing'), frame_processor_module.NAME) + frame_processor_module.process_image(facefusion.globals.source_paths, normed_output_path, normed_output_path) + frame_processor_module.post_process() + if is_process_stopping(): + return + # finalize image + logger.info(wording.get('finalizing_image').format(resolution = facefusion.globals.output_image_resolution), __name__.upper()) + if finalize_image(normed_output_path, facefusion.globals.output_image_resolution): + logger.debug(wording.get('finalizing_image_succeed'), __name__.upper()) + else: + logger.warn(wording.get('finalizing_image_skipped'), __name__.upper()) + # validate image + if is_image(normed_output_path): + seconds = '{:.2f}'.format((time() - start_time) % 60) + logger.info(wording.get('processing_image_succeed').format(seconds = seconds), __name__.upper()) + conditional_log_statistics() + else: + logger.error(wording.get('processing_image_failed'), __name__.upper()) + process_manager.end() + + +def process_video(start_time : float) -> None: + normed_output_path = normalize_output_path(facefusion.globals.target_path, facefusion.globals.output_path) + # if analyse_video(facefusion.globals.target_path, facefusion.globals.trim_frame_start, facefusion.globals.trim_frame_end): + # return + # clear temp + logger.debug(wording.get('clearing_temp'), __name__.upper()) + clear_temp(facefusion.globals.target_path) + # create temp + logger.debug(wording.get('creating_temp'), __name__.upper()) + create_temp(facefusion.globals.target_path) + # extract frames + process_manager.start() + temp_video_resolution = pack_resolution(restrict_video_resolution(facefusion.globals.target_path, unpack_resolution(facefusion.globals.output_video_resolution))) + temp_video_fps = restrict_video_fps(facefusion.globals.target_path, facefusion.globals.output_video_fps) + logger.info(wording.get('extracting_frames').format(resolution = temp_video_resolution, fps = temp_video_fps), __name__.upper()) + if extract_frames(facefusion.globals.target_path, temp_video_resolution, temp_video_fps): + logger.debug(wording.get('extracting_frames_succeed'), __name__.upper()) + else: + if is_process_stopping(): + return + logger.error(wording.get('extracting_frames_failed'), __name__.upper()) + return + # process frames + temp_frame_paths = get_temp_frame_paths(facefusion.globals.target_path) + if temp_frame_paths: + for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): + logger.info(wording.get('processing'), frame_processor_module.NAME) + frame_processor_module.process_video(facefusion.globals.source_paths, temp_frame_paths) + frame_processor_module.post_process() + if is_process_stopping(): + return + else: + logger.error(wording.get('temp_frames_not_found'), __name__.upper()) + return + # merge video + logger.info(wording.get('merging_video').format(resolution = facefusion.globals.output_video_resolution, fps = facefusion.globals.output_video_fps), __name__.upper()) + if merge_video(facefusion.globals.target_path, facefusion.globals.output_video_resolution, facefusion.globals.output_video_fps): + logger.debug(wording.get('merging_video_succeed'), __name__.upper()) + else: + if is_process_stopping(): + return + logger.error(wording.get('merging_video_failed'), __name__.upper()) + return + # handle audio + if facefusion.globals.skip_audio: + logger.info(wording.get('skipping_audio'), __name__.upper()) + move_temp(facefusion.globals.target_path, normed_output_path) + else: + if 'lip_syncer' in facefusion.globals.frame_processors: + source_audio_path = get_first(filter_audio_paths(facefusion.globals.source_paths)) + if source_audio_path and replace_audio(facefusion.globals.target_path, source_audio_path, normed_output_path): + logger.debug(wording.get('restoring_audio_succeed'), __name__.upper()) + else: + if is_process_stopping(): + return + logger.warn(wording.get('restoring_audio_skipped'), __name__.upper()) + move_temp(facefusion.globals.target_path, normed_output_path) + else: + if restore_audio(facefusion.globals.target_path, normed_output_path, facefusion.globals.output_video_fps): + logger.debug(wording.get('restoring_audio_succeed'), __name__.upper()) + else: + if is_process_stopping(): + return + logger.warn(wording.get('restoring_audio_skipped'), __name__.upper()) + move_temp(facefusion.globals.target_path, normed_output_path) + # clear temp + logger.debug(wording.get('clearing_temp'), __name__.upper()) + clear_temp(facefusion.globals.target_path) + # validate video + if is_video(normed_output_path): + seconds = '{:.2f}'.format((time() - start_time)) + logger.info(wording.get('processing_video_succeed').format(seconds = seconds), __name__.upper()) + conditional_log_statistics() + else: + logger.error(wording.get('processing_video_failed'), __name__.upper()) + process_manager.end() + + +def is_process_stopping() -> bool: + if process_manager.is_stopping(): + process_manager.end() + logger.info(wording.get('processing_stopped'), __name__.upper()) + return process_manager.is_pending() diff --git a/facefusion/download.py b/facefusion/download.py new file mode 100644 index 0000000000000000000000000000000000000000..d24b43406934a96a291b2ec57949fad0e267ef41 --- /dev/null +++ b/facefusion/download.py @@ -0,0 +1,48 @@ +import os +import subprocess +import platform +import ssl +import urllib.request +from typing import List +from functools import lru_cache +from tqdm import tqdm + +import facefusion.globals +from facefusion import wording +from facefusion.filesystem import is_file + +if platform.system().lower() == 'darwin': + ssl._create_default_https_context = ssl._create_unverified_context + + +def conditional_download(download_directory_path : str, urls : List[str]) -> None: + for url in urls: + download_file_path = os.path.join(download_directory_path, os.path.basename(url)) + initial_size = os.path.getsize(download_file_path) if is_file(download_file_path) else 0 + download_size = get_download_size(url) + if initial_size < download_size: + with tqdm(total = download_size, initial = initial_size, desc = wording.get('downloading'), unit = 'B', unit_scale = True, unit_divisor = 1024, ascii = ' =', disable = facefusion.globals.log_level in [ 'warn', 'error' ]) as progress: + subprocess.Popen([ 'curl', '--create-dirs', '--silent', '--insecure', '--location', '--continue-at', '-', '--output', download_file_path, url ]) + current_size = initial_size + while current_size < download_size: + if is_file(download_file_path): + current_size = os.path.getsize(download_file_path) + progress.update(current_size - progress.n) + if download_size and not is_download_done(url, download_file_path): + os.remove(download_file_path) + conditional_download(download_directory_path, [ url ]) + + +@lru_cache(maxsize = None) +def get_download_size(url : str) -> int: + try: + response = urllib.request.urlopen(url, timeout = 10) + return int(response.getheader('Content-Length')) + except (OSError, ValueError): + return 0 + + +def is_download_done(url : str, file_path : str) -> bool: + if is_file(file_path): + return get_download_size(url) == os.path.getsize(file_path) + return False diff --git a/facefusion/execution.py b/facefusion/execution.py new file mode 100644 index 0000000000000000000000000000000000000000..cf4733bd74c1476e4adf22973cc7f0b57fc0b5a4 --- /dev/null +++ b/facefusion/execution.py @@ -0,0 +1,96 @@ +from typing import List, Any +from functools import lru_cache +import subprocess +import xml.etree.ElementTree as ElementTree +import onnxruntime + +from facefusion.typing import ExecutionDevice, ValueAndUnit + + +def encode_execution_providers(execution_providers : List[str]) -> List[str]: + return [ execution_provider.replace('ExecutionProvider', '').lower() for execution_provider in execution_providers ] + + +def decode_execution_providers(execution_providers : List[str]) -> List[str]: + available_execution_providers = onnxruntime.get_available_providers() + encoded_execution_providers = encode_execution_providers(available_execution_providers) + + return [ execution_provider for execution_provider, encoded_execution_provider in zip(available_execution_providers, encoded_execution_providers) if any(execution_provider in encoded_execution_provider for execution_provider in execution_providers) ] + + +def apply_execution_provider_options(execution_providers : List[str]) -> List[Any]: + execution_providers_with_options : List[Any] = [] + + for execution_provider in execution_providers: + if execution_provider == 'CUDAExecutionProvider': + execution_providers_with_options.append((execution_provider, + { + 'cudnn_conv_algo_search': 'EXHAUSTIVE' if use_exhaustive() else 'DEFAULT' + })) + else: + execution_providers_with_options.append(execution_provider) + return execution_providers_with_options + + +def use_exhaustive() -> bool: + execution_devices = detect_static_execution_devices() + product_names = ('GeForce GTX 1630', 'GeForce GTX 1650', 'GeForce GTX 1660') + + return any(execution_device.get('product').get('name').startswith(product_names) for execution_device in execution_devices) + + +def run_nvidia_smi() -> subprocess.Popen[bytes]: + commands = [ 'nvidia-smi', '--query', '--xml-format' ] + return subprocess.Popen(commands, stdout = subprocess.PIPE) + + +@lru_cache(maxsize = None) +def detect_static_execution_devices() -> List[ExecutionDevice]: + return detect_execution_devices() + + +def detect_execution_devices() -> List[ExecutionDevice]: + execution_devices : List[ExecutionDevice] = [] + try: + output, _ = run_nvidia_smi().communicate() + root_element = ElementTree.fromstring(output) + except Exception: + root_element = ElementTree.Element('xml') + + for gpu_element in root_element.findall('gpu'): + execution_devices.append( + { + 'driver_version': root_element.find('driver_version').text, + 'framework': + { + 'name': 'CUDA', + 'version': root_element.find('cuda_version').text + }, + 'product': + { + 'vendor': 'NVIDIA', + 'name': gpu_element.find('product_name').text.replace('NVIDIA ', '') + }, + 'video_memory': + { + 'total': create_value_and_unit(gpu_element.find('fb_memory_usage/total').text), + 'free': create_value_and_unit(gpu_element.find('fb_memory_usage/free').text) + }, + 'utilization': + { + 'gpu': create_value_and_unit(gpu_element.find('utilization/gpu_util').text), + 'memory': create_value_and_unit(gpu_element.find('utilization/memory_util').text) + } + }) + return execution_devices + + +def create_value_and_unit(text : str) -> ValueAndUnit: + value, unit = text.split() + value_and_unit : ValueAndUnit =\ + { + 'value': value, + 'unit': unit + } + + return value_and_unit diff --git a/facefusion/face_analyser.py b/facefusion/face_analyser.py new file mode 100644 index 0000000000000000000000000000000000000000..81f501bc3ebcfcdb72cda44d07a3da4b267270ae --- /dev/null +++ b/facefusion/face_analyser.py @@ -0,0 +1,586 @@ +from typing import Any, Optional, List, Tuple +from time import sleep +import cv2 +import numpy +import onnxruntime + +import facefusion.globals +from facefusion import process_manager +from facefusion.common_helper import get_first +from facefusion.face_helper import estimate_matrix_by_face_landmark_5, warp_face_by_face_landmark_5, warp_face_by_translation, create_static_anchors, distance_to_face_landmark_5, distance_to_bounding_box, convert_face_landmark_68_to_5, apply_nms, categorize_age, categorize_gender +from facefusion.face_store import get_static_faces, set_static_faces +from facefusion.execution import apply_execution_provider_options +from facefusion.download import conditional_download +from facefusion.filesystem import resolve_relative_path, is_file +from facefusion.thread_helper import thread_lock, thread_semaphore, conditional_thread_semaphore +from facefusion.typing import VisionFrame, Face, FaceSet, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, ModelSet, BoundingBox, FaceLandmarkSet, FaceLandmark5, FaceLandmark68, Score, FaceScoreSet, Embedding +from facefusion.vision import resize_frame_resolution, unpack_resolution + +FACE_ANALYSER = None +MODELS : ModelSet =\ +{ + 'face_detector_retinaface': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/retinaface_10g.onnx', + 'path': resolve_relative_path('../.assets/models/retinaface_10g.onnx') + }, + 'face_detector_scrfd': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/scrfd_2.5g.onnx', + 'path': resolve_relative_path('../.assets/models/scrfd_2.5g.onnx') + }, + 'face_detector_yoloface': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/yoloface_8n.onnx', + 'path': resolve_relative_path('../.assets/models/yoloface_8n.onnx') + }, + 'face_detector_yunet': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/yunet_2023mar.onnx', + 'path': resolve_relative_path('../.assets/models/yunet_2023mar.onnx') + }, + 'face_recognizer_arcface_blendswap': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/arcface_w600k_r50.onnx', + 'path': resolve_relative_path('../.assets/models/arcface_w600k_r50.onnx') + }, + 'face_recognizer_arcface_inswapper': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/arcface_w600k_r50.onnx', + 'path': resolve_relative_path('../.assets/models/arcface_w600k_r50.onnx') + }, + 'face_recognizer_arcface_simswap': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/arcface_simswap.onnx', + 'path': resolve_relative_path('../.assets/models/arcface_simswap.onnx') + }, + 'face_recognizer_arcface_uniface': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/arcface_w600k_r50.onnx', + 'path': resolve_relative_path('../.assets/models/arcface_w600k_r50.onnx') + }, + 'face_landmarker_68': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/2dfan4.onnx', + 'path': resolve_relative_path('../.assets/models/2dfan4.onnx') + }, + 'face_landmarker_68_5': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/face_landmarker_68_5.onnx', + 'path': resolve_relative_path('../.assets/models/face_landmarker_68_5.onnx') + }, + 'gender_age': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gender_age.onnx', + 'path': resolve_relative_path('../.assets/models/gender_age.onnx') + } +} + + +def get_face_analyser() -> Any: + global FACE_ANALYSER + + face_detectors = {} + face_landmarkers = {} + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if FACE_ANALYSER is None: + if facefusion.globals.face_detector_model in [ 'many', 'retinaface' ]: + face_detectors['retinaface'] = onnxruntime.InferenceSession(MODELS.get('face_detector_retinaface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + if facefusion.globals.face_detector_model in [ 'many', 'scrfd' ]: + face_detectors['scrfd'] = onnxruntime.InferenceSession(MODELS.get('face_detector_scrfd').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + if facefusion.globals.face_detector_model in [ 'many', 'yoloface' ]: + face_detectors['yoloface'] = onnxruntime.InferenceSession(MODELS.get('face_detector_yoloface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + if facefusion.globals.face_detector_model in [ 'yunet' ]: + face_detectors['yunet'] = cv2.FaceDetectorYN.create(MODELS.get('face_detector_yunet').get('path'), '', (0, 0)) + if facefusion.globals.face_recognizer_model == 'arcface_blendswap': + face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_blendswap').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + if facefusion.globals.face_recognizer_model == 'arcface_inswapper': + face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_inswapper').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + if facefusion.globals.face_recognizer_model == 'arcface_simswap': + face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_simswap').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + if facefusion.globals.face_recognizer_model == 'arcface_uniface': + face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_uniface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + face_landmarkers['68'] = onnxruntime.InferenceSession(MODELS.get('face_landmarker_68').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + face_landmarkers['68_5'] = onnxruntime.InferenceSession(MODELS.get('face_landmarker_68_5').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + gender_age = onnxruntime.InferenceSession(MODELS.get('gender_age').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + FACE_ANALYSER =\ + { + 'face_detectors': face_detectors, + 'face_recognizer': face_recognizer, + 'face_landmarkers': face_landmarkers, + 'gender_age': gender_age + } + return FACE_ANALYSER + + +def clear_face_analyser() -> Any: + global FACE_ANALYSER + + FACE_ANALYSER = None + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../.assets/models') + model_urls =\ + [ + MODELS.get('face_landmarker_68').get('url'), + MODELS.get('face_landmarker_68_5').get('url'), + MODELS.get('gender_age').get('url') + ] + model_paths =\ + [ + MODELS.get('face_landmarker_68').get('path'), + MODELS.get('face_landmarker_68_5').get('path'), + MODELS.get('gender_age').get('path') + ] + + if facefusion.globals.face_detector_model in [ 'many', 'retinaface' ]: + model_urls.append(MODELS.get('face_detector_retinaface').get('url')) + model_paths.append(MODELS.get('face_detector_retinaface').get('path')) + if facefusion.globals.face_detector_model in [ 'many', 'scrfd' ]: + model_urls.append(MODELS.get('face_detector_scrfd').get('url')) + model_paths.append(MODELS.get('face_detector_scrfd').get('path')) + if facefusion.globals.face_detector_model in [ 'many', 'yoloface' ]: + model_urls.append(MODELS.get('face_detector_yoloface').get('url')) + model_paths.append(MODELS.get('face_detector_yoloface').get('path')) + if facefusion.globals.face_detector_model in [ 'yunet' ]: + model_urls.append(MODELS.get('face_detector_yunet').get('url')) + model_paths.append(MODELS.get('face_detector_yunet').get('path')) + if facefusion.globals.face_recognizer_model == 'arcface_blendswap': + model_urls.append(MODELS.get('face_recognizer_arcface_blendswap').get('url')) + model_paths.append(MODELS.get('face_recognizer_arcface_blendswap').get('path')) + if facefusion.globals.face_recognizer_model == 'arcface_inswapper': + model_urls.append(MODELS.get('face_recognizer_arcface_inswapper').get('url')) + model_paths.append(MODELS.get('face_recognizer_arcface_inswapper').get('path')) + if facefusion.globals.face_recognizer_model == 'arcface_simswap': + model_urls.append(MODELS.get('face_recognizer_arcface_simswap').get('url')) + model_paths.append(MODELS.get('face_recognizer_arcface_simswap').get('path')) + if facefusion.globals.face_recognizer_model == 'arcface_uniface': + model_urls.append(MODELS.get('face_recognizer_arcface_uniface').get('url')) + model_paths.append(MODELS.get('face_recognizer_arcface_uniface').get('path')) + + if not facefusion.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, model_urls) + process_manager.end() + return all(is_file(model_path) for model_path in model_paths) + + +def detect_with_retinaface(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]: + face_detector = get_face_analyser().get('face_detectors').get('retinaface') + face_detector_width, face_detector_height = unpack_resolution(face_detector_size) + temp_vision_frame = resize_frame_resolution(vision_frame, (face_detector_width, face_detector_height)) + ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0] + ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1] + feature_strides = [ 8, 16, 32 ] + feature_map_channel = 3 + anchor_total = 2 + bounding_box_list = [] + face_landmark_5_list = [] + score_list = [] + + detect_vision_frame = prepare_detect_frame(temp_vision_frame, face_detector_size) + with thread_semaphore(): + detections = face_detector.run(None, + { + face_detector.get_inputs()[0].name: detect_vision_frame + }) + for index, feature_stride in enumerate(feature_strides): + keep_indices = numpy.where(detections[index] >= facefusion.globals.face_detector_score)[0] + if keep_indices.any(): + stride_height = face_detector_height // feature_stride + stride_width = face_detector_width // feature_stride + anchors = create_static_anchors(feature_stride, anchor_total, stride_height, stride_width) + bounding_box_raw = detections[index + feature_map_channel] * feature_stride + face_landmark_5_raw = detections[index + feature_map_channel * 2] * feature_stride + for bounding_box in distance_to_bounding_box(anchors, bounding_box_raw)[keep_indices]: + bounding_box_list.append(numpy.array( + [ + bounding_box[0] * ratio_width, + bounding_box[1] * ratio_height, + bounding_box[2] * ratio_width, + bounding_box[3] * ratio_height + ])) + for face_landmark_5 in distance_to_face_landmark_5(anchors, face_landmark_5_raw)[keep_indices]: + face_landmark_5_list.append(face_landmark_5 * [ ratio_width, ratio_height ]) + for score in detections[index][keep_indices]: + score_list.append(score[0]) + return bounding_box_list, face_landmark_5_list, score_list + + +def detect_with_scrfd(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]: + face_detector = get_face_analyser().get('face_detectors').get('scrfd') + face_detector_width, face_detector_height = unpack_resolution(face_detector_size) + temp_vision_frame = resize_frame_resolution(vision_frame, (face_detector_width, face_detector_height)) + ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0] + ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1] + feature_strides = [ 8, 16, 32 ] + feature_map_channel = 3 + anchor_total = 2 + bounding_box_list = [] + face_landmark_5_list = [] + score_list = [] + + detect_vision_frame = prepare_detect_frame(temp_vision_frame, face_detector_size) + with thread_semaphore(): + detections = face_detector.run(None, + { + face_detector.get_inputs()[0].name: detect_vision_frame + }) + for index, feature_stride in enumerate(feature_strides): + keep_indices = numpy.where(detections[index] >= facefusion.globals.face_detector_score)[0] + if keep_indices.any(): + stride_height = face_detector_height // feature_stride + stride_width = face_detector_width // feature_stride + anchors = create_static_anchors(feature_stride, anchor_total, stride_height, stride_width) + bounding_box_raw = detections[index + feature_map_channel] * feature_stride + face_landmark_5_raw = detections[index + feature_map_channel * 2] * feature_stride + for bounding_box in distance_to_bounding_box(anchors, bounding_box_raw)[keep_indices]: + bounding_box_list.append(numpy.array( + [ + bounding_box[0] * ratio_width, + bounding_box[1] * ratio_height, + bounding_box[2] * ratio_width, + bounding_box[3] * ratio_height + ])) + for face_landmark_5 in distance_to_face_landmark_5(anchors, face_landmark_5_raw)[keep_indices]: + face_landmark_5_list.append(face_landmark_5 * [ ratio_width, ratio_height ]) + for score in detections[index][keep_indices]: + score_list.append(score[0]) + return bounding_box_list, face_landmark_5_list, score_list + + +def detect_with_yoloface(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]: + face_detector = get_face_analyser().get('face_detectors').get('yoloface') + face_detector_width, face_detector_height = unpack_resolution(face_detector_size) + temp_vision_frame = resize_frame_resolution(vision_frame, (face_detector_width, face_detector_height)) + ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0] + ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1] + bounding_box_list = [] + face_landmark_5_list = [] + score_list = [] + + detect_vision_frame = prepare_detect_frame(temp_vision_frame, face_detector_size) + with thread_semaphore(): + detections = face_detector.run(None, + { + face_detector.get_inputs()[0].name: detect_vision_frame + }) + detections = numpy.squeeze(detections).T + bounding_box_raw, score_raw, face_landmark_5_raw = numpy.split(detections, [ 4, 5 ], axis = 1) + keep_indices = numpy.where(score_raw > facefusion.globals.face_detector_score)[0] + if keep_indices.any(): + bounding_box_raw, face_landmark_5_raw, score_raw = bounding_box_raw[keep_indices], face_landmark_5_raw[keep_indices], score_raw[keep_indices] + for bounding_box in bounding_box_raw: + bounding_box_list.append(numpy.array( + [ + (bounding_box[0] - bounding_box[2] / 2) * ratio_width, + (bounding_box[1] - bounding_box[3] / 2) * ratio_height, + (bounding_box[0] + bounding_box[2] / 2) * ratio_width, + (bounding_box[1] + bounding_box[3] / 2) * ratio_height + ])) + face_landmark_5_raw[:, 0::3] = (face_landmark_5_raw[:, 0::3]) * ratio_width + face_landmark_5_raw[:, 1::3] = (face_landmark_5_raw[:, 1::3]) * ratio_height + for face_landmark_5 in face_landmark_5_raw: + face_landmark_5_list.append(numpy.array(face_landmark_5.reshape(-1, 3)[:, :2])) + score_list = score_raw.ravel().tolist() + return bounding_box_list, face_landmark_5_list, score_list + + +def detect_with_yunet(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]: + face_detector = get_face_analyser().get('face_detectors').get('yunet') + face_detector_width, face_detector_height = unpack_resolution(face_detector_size) + temp_vision_frame = resize_frame_resolution(vision_frame, (face_detector_width, face_detector_height)) + ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0] + ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1] + bounding_box_list = [] + face_landmark_5_list = [] + score_list = [] + + face_detector.setInputSize((temp_vision_frame.shape[1], temp_vision_frame.shape[0])) + face_detector.setScoreThreshold(facefusion.globals.face_detector_score) + with thread_semaphore(): + _, detections = face_detector.detect(temp_vision_frame) + if numpy.any(detections): + for detection in detections: + bounding_box_list.append(numpy.array( + [ + detection[0] * ratio_width, + detection[1] * ratio_height, + (detection[0] + detection[2]) * ratio_width, + (detection[1] + detection[3]) * ratio_height + ])) + face_landmark_5_list.append(detection[4:14].reshape((5, 2)) * [ ratio_width, ratio_height ]) + score_list.append(detection[14]) + return bounding_box_list, face_landmark_5_list, score_list + + +def prepare_detect_frame(temp_vision_frame : VisionFrame, face_detector_size : str) -> VisionFrame: + face_detector_width, face_detector_height = unpack_resolution(face_detector_size) + detect_vision_frame = numpy.zeros((face_detector_height, face_detector_width, 3)) + detect_vision_frame[:temp_vision_frame.shape[0], :temp_vision_frame.shape[1], :] = temp_vision_frame + detect_vision_frame = (detect_vision_frame - 127.5) / 128.0 + detect_vision_frame = numpy.expand_dims(detect_vision_frame.transpose(2, 0, 1), axis = 0).astype(numpy.float32) + return detect_vision_frame + + +def create_faces(vision_frame : VisionFrame, bounding_box_list : List[BoundingBox], face_landmark_5_list : List[FaceLandmark5], score_list : List[Score]) -> List[Face]: + faces = [] + if facefusion.globals.face_detector_score > 0: + sort_indices = numpy.argsort(-numpy.array(score_list)) + bounding_box_list = [ bounding_box_list[index] for index in sort_indices ] + face_landmark_5_list = [face_landmark_5_list[index] for index in sort_indices] + score_list = [ score_list[index] for index in sort_indices ] + iou_threshold = 0.1 if facefusion.globals.face_detector_model == 'many' else 0.4 + keep_indices = apply_nms(bounding_box_list, iou_threshold) + for index in keep_indices: + bounding_box = bounding_box_list[index] + face_landmark_5_68 = face_landmark_5_list[index] + face_landmark_68_5 = expand_face_landmark_68_from_5(face_landmark_5_68) + face_landmark_68 = face_landmark_68_5 + face_landmark_68_score = 0.0 + if facefusion.globals.face_landmarker_score > 0: + face_landmark_68, face_landmark_68_score = detect_face_landmark_68(vision_frame, bounding_box) + if face_landmark_68_score > facefusion.globals.face_landmarker_score: + face_landmark_5_68 = convert_face_landmark_68_to_5(face_landmark_68) + landmarks : FaceLandmarkSet =\ + { + '5': face_landmark_5_list[index], + '5/68': face_landmark_5_68, + '68': face_landmark_68, + '68/5': face_landmark_68_5 + } + scores : FaceScoreSet = \ + { + 'detector': score_list[index], + 'landmarker': face_landmark_68_score + } + embedding, normed_embedding = calc_embedding(vision_frame, landmarks.get('5/68')) + gender, age = detect_gender_age(vision_frame, bounding_box) + faces.append(Face( + bounding_box = bounding_box, + landmarks = landmarks, + scores = scores, + embedding = embedding, + normed_embedding = normed_embedding, + gender = gender, + age = age + )) + return faces + + +def calc_embedding(temp_vision_frame : VisionFrame, face_landmark_5 : FaceLandmark5) -> Tuple[Embedding, Embedding]: + face_recognizer = get_face_analyser().get('face_recognizer') + crop_vision_frame, matrix = warp_face_by_face_landmark_5(temp_vision_frame, face_landmark_5, 'arcface_112_v2', (112, 112)) + crop_vision_frame = crop_vision_frame / 127.5 - 1 + crop_vision_frame = crop_vision_frame[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) + crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0) + with conditional_thread_semaphore(facefusion.globals.execution_providers): + embedding = face_recognizer.run(None, + { + face_recognizer.get_inputs()[0].name: crop_vision_frame + })[0] + embedding = embedding.ravel() + normed_embedding = embedding / numpy.linalg.norm(embedding) + return embedding, normed_embedding + + +def detect_face_landmark_68(temp_vision_frame : VisionFrame, bounding_box : BoundingBox) -> Tuple[FaceLandmark68, Score]: + face_landmarker = get_face_analyser().get('face_landmarkers').get('68') + scale = 195 / numpy.subtract(bounding_box[2:], bounding_box[:2]).max() + translation = (256 - numpy.add(bounding_box[2:], bounding_box[:2]) * scale) * 0.5 + crop_vision_frame, affine_matrix = warp_face_by_translation(temp_vision_frame, translation, scale, (256, 256)) + crop_vision_frame = cv2.cvtColor(crop_vision_frame, cv2.COLOR_RGB2Lab) + if numpy.mean(crop_vision_frame[:, :, 0]) < 30: + crop_vision_frame[:, :, 0] = cv2.createCLAHE(clipLimit = 2).apply(crop_vision_frame[:, :, 0]) + crop_vision_frame = cv2.cvtColor(crop_vision_frame, cv2.COLOR_Lab2RGB) + crop_vision_frame = crop_vision_frame.transpose(2, 0, 1).astype(numpy.float32) / 255.0 + with conditional_thread_semaphore(facefusion.globals.execution_providers): + face_landmark_68, face_heatmap = face_landmarker.run(None, + { + face_landmarker.get_inputs()[0].name: [ crop_vision_frame ] + }) + face_landmark_68 = face_landmark_68[:, :, :2][0] / 64 + face_landmark_68 = face_landmark_68.reshape(1, -1, 2) * 256 + face_landmark_68 = cv2.transform(face_landmark_68, cv2.invertAffineTransform(affine_matrix)) + face_landmark_68 = face_landmark_68.reshape(-1, 2) + face_landmark_68_score = numpy.amax(face_heatmap, axis = (2, 3)) + face_landmark_68_score = numpy.mean(face_landmark_68_score) + return face_landmark_68, face_landmark_68_score + + +def expand_face_landmark_68_from_5(face_landmark_5 : FaceLandmark5) -> FaceLandmark68: + face_landmarker = get_face_analyser().get('face_landmarkers').get('68_5') + affine_matrix = estimate_matrix_by_face_landmark_5(face_landmark_5, 'ffhq_512', (1, 1)) + face_landmark_5 = cv2.transform(face_landmark_5.reshape(1, -1, 2), affine_matrix).reshape(-1, 2) + with conditional_thread_semaphore(facefusion.globals.execution_providers): + face_landmark_68_5 = face_landmarker.run(None, + { + face_landmarker.get_inputs()[0].name: [ face_landmark_5 ] + })[0][0] + face_landmark_68_5 = cv2.transform(face_landmark_68_5.reshape(1, -1, 2), cv2.invertAffineTransform(affine_matrix)).reshape(-1, 2) + return face_landmark_68_5 + + +def detect_gender_age(temp_vision_frame : VisionFrame, bounding_box : BoundingBox) -> Tuple[int, int]: + gender_age = get_face_analyser().get('gender_age') + bounding_box = bounding_box.reshape(2, -1) + scale = 64 / numpy.subtract(*bounding_box[::-1]).max() + translation = 48 - bounding_box.sum(axis = 0) * scale * 0.5 + crop_vision_frame, affine_matrix = warp_face_by_translation(temp_vision_frame, translation, scale, (96, 96)) + crop_vision_frame = crop_vision_frame[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) + crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0) + with conditional_thread_semaphore(facefusion.globals.execution_providers): + prediction = gender_age.run(None, + { + gender_age.get_inputs()[0].name: crop_vision_frame + })[0][0] + gender = int(numpy.argmax(prediction[:2])) + age = int(numpy.round(prediction[2] * 100)) + return gender, age + + +def get_one_face(vision_frame : VisionFrame, position : int = 0) -> Optional[Face]: + many_faces = get_many_faces(vision_frame) + if many_faces: + try: + return many_faces[position] + except IndexError: + return many_faces[-1] + return None + + +def get_average_face(vision_frames : List[VisionFrame], position : int = 0) -> Optional[Face]: + average_face = None + faces = [] + embedding_list = [] + normed_embedding_list = [] + + for vision_frame in vision_frames: + face = get_one_face(vision_frame, position) + if face: + faces.append(face) + embedding_list.append(face.embedding) + normed_embedding_list.append(face.normed_embedding) + if faces: + first_face = get_first(faces) + average_face = Face( + bounding_box = first_face.bounding_box, + landmarks = first_face.landmarks, + scores = first_face.scores, + embedding = numpy.mean(embedding_list, axis = 0), + normed_embedding = numpy.mean(normed_embedding_list, axis = 0), + gender = first_face.gender, + age = first_face.age + ) + return average_face + + +def get_many_faces(vision_frame : VisionFrame) -> List[Face]: + faces = [] + try: + faces_cache = get_static_faces(vision_frame) + if faces_cache: + faces = faces_cache + else: + bounding_box_list = [] + face_landmark_5_list = [] + score_list = [] + + if facefusion.globals.face_detector_model in [ 'many', 'retinaface']: + bounding_box_list_retinaface, face_landmark_5_list_retinaface, score_list_retinaface = detect_with_retinaface(vision_frame, facefusion.globals.face_detector_size) + bounding_box_list.extend(bounding_box_list_retinaface) + face_landmark_5_list.extend(face_landmark_5_list_retinaface) + score_list.extend(score_list_retinaface) + if facefusion.globals.face_detector_model in [ 'many', 'scrfd' ]: + bounding_box_list_scrfd, face_landmark_5_list_scrfd, score_list_scrfd = detect_with_scrfd(vision_frame, facefusion.globals.face_detector_size) + bounding_box_list.extend(bounding_box_list_scrfd) + face_landmark_5_list.extend(face_landmark_5_list_scrfd) + score_list.extend(score_list_scrfd) + if facefusion.globals.face_detector_model in [ 'many', 'yoloface' ]: + bounding_box_list_yoloface, face_landmark_5_list_yoloface, score_list_yoloface = detect_with_yoloface(vision_frame, facefusion.globals.face_detector_size) + bounding_box_list.extend(bounding_box_list_yoloface) + face_landmark_5_list.extend(face_landmark_5_list_yoloface) + score_list.extend(score_list_yoloface) + if facefusion.globals.face_detector_model in [ 'yunet' ]: + bounding_box_list_yunet, face_landmark_5_list_yunet, score_list_yunet = detect_with_yunet(vision_frame, facefusion.globals.face_detector_size) + bounding_box_list.extend(bounding_box_list_yunet) + face_landmark_5_list.extend(face_landmark_5_list_yunet) + score_list.extend(score_list_yunet) + if bounding_box_list and face_landmark_5_list and score_list: + faces = create_faces(vision_frame, bounding_box_list, face_landmark_5_list, score_list) + if faces: + set_static_faces(vision_frame, faces) + if facefusion.globals.face_analyser_order: + faces = sort_by_order(faces, facefusion.globals.face_analyser_order) + if facefusion.globals.face_analyser_age: + faces = filter_by_age(faces, facefusion.globals.face_analyser_age) + if facefusion.globals.face_analyser_gender: + faces = filter_by_gender(faces, facefusion.globals.face_analyser_gender) + except (AttributeError, ValueError): + pass + return faces + + +def find_similar_faces(reference_faces : FaceSet, vision_frame : VisionFrame, face_distance : float) -> List[Face]: + similar_faces : List[Face] = [] + many_faces = get_many_faces(vision_frame) + + if reference_faces: + for reference_set in reference_faces: + if not similar_faces: + for reference_face in reference_faces[reference_set]: + for face in many_faces: + if compare_faces(face, reference_face, face_distance): + similar_faces.append(face) + return similar_faces + + +def compare_faces(face : Face, reference_face : Face, face_distance : float) -> bool: + current_face_distance = calc_face_distance(face, reference_face) + return current_face_distance < face_distance + + +def calc_face_distance(face : Face, reference_face : Face) -> float: + if hasattr(face, 'normed_embedding') and hasattr(reference_face, 'normed_embedding'): + return 1 - numpy.dot(face.normed_embedding, reference_face.normed_embedding) + return 0 + + +def sort_by_order(faces : List[Face], order : FaceAnalyserOrder) -> List[Face]: + if order == 'left-right': + return sorted(faces, key = lambda face: face.bounding_box[0]) + if order == 'right-left': + return sorted(faces, key = lambda face: face.bounding_box[0], reverse = True) + if order == 'top-bottom': + return sorted(faces, key = lambda face: face.bounding_box[1]) + if order == 'bottom-top': + return sorted(faces, key = lambda face: face.bounding_box[1], reverse = True) + if order == 'small-large': + return sorted(faces, key = lambda face: (face.bounding_box[2] - face.bounding_box[0]) * (face.bounding_box[3] - face.bounding_box[1])) + if order == 'large-small': + return sorted(faces, key = lambda face: (face.bounding_box[2] - face.bounding_box[0]) * (face.bounding_box[3] - face.bounding_box[1]), reverse = True) + if order == 'best-worst': + return sorted(faces, key = lambda face: face.scores.get('detector'), reverse = True) + if order == 'worst-best': + return sorted(faces, key = lambda face: face.scores.get('detector')) + return faces + + +def filter_by_age(faces : List[Face], age : FaceAnalyserAge) -> List[Face]: + filter_faces = [] + for face in faces: + if categorize_age(face.age) == age: + filter_faces.append(face) + return filter_faces + + +def filter_by_gender(faces : List[Face], gender : FaceAnalyserGender) -> List[Face]: + filter_faces = [] + for face in faces: + if categorize_gender(face.gender) == gender: + filter_faces.append(face) + return filter_faces diff --git a/facefusion/face_helper.py b/facefusion/face_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..83eff56e574e1f89cfe6955731dca72e1fce9d8b --- /dev/null +++ b/facefusion/face_helper.py @@ -0,0 +1,169 @@ +from typing import Any, Tuple, List +from cv2.typing import Size +from functools import lru_cache +import cv2 +import numpy + +from facefusion.typing import BoundingBox, FaceLandmark5, FaceLandmark68, VisionFrame, Mask, Matrix, Translation, WarpTemplate, WarpTemplateSet, FaceAnalyserAge, FaceAnalyserGender + +WARP_TEMPLATES : WarpTemplateSet =\ +{ + 'arcface_112_v1': numpy.array( + [ + [ 0.35473214, 0.45658929 ], + [ 0.64526786, 0.45658929 ], + [ 0.50000000, 0.61154464 ], + [ 0.37913393, 0.77687500 ], + [ 0.62086607, 0.77687500 ] + ]), + 'arcface_112_v2': numpy.array( + [ + [ 0.34191607, 0.46157411 ], + [ 0.65653393, 0.45983393 ], + [ 0.50022500, 0.64050536 ], + [ 0.37097589, 0.82469196 ], + [ 0.63151696, 0.82325089 ] + ]), + 'arcface_128_v2': numpy.array( + [ + [ 0.36167656, 0.40387734 ], + [ 0.63696719, 0.40235469 ], + [ 0.50019687, 0.56044219 ], + [ 0.38710391, 0.72160547 ], + [ 0.61507734, 0.72034453 ] + ]), + 'ffhq_512': numpy.array( + [ + [ 0.37691676, 0.46864664 ], + [ 0.62285697, 0.46912813 ], + [ 0.50123859, 0.61331904 ], + [ 0.39308822, 0.72541100 ], + [ 0.61150205, 0.72490465 ] + ]) +} + + +def estimate_matrix_by_face_landmark_5(face_landmark_5 : FaceLandmark5, warp_template : WarpTemplate, crop_size : Size) -> Matrix: + normed_warp_template = WARP_TEMPLATES.get(warp_template) * crop_size + affine_matrix = cv2.estimateAffinePartial2D(face_landmark_5, normed_warp_template, method = cv2.RANSAC, ransacReprojThreshold = 100)[0] + return affine_matrix + + +def warp_face_by_face_landmark_5(temp_vision_frame : VisionFrame, face_landmark_5 : FaceLandmark5, warp_template : WarpTemplate, crop_size : Size) -> Tuple[VisionFrame, Matrix]: + affine_matrix = estimate_matrix_by_face_landmark_5(face_landmark_5, warp_template, crop_size) + crop_vision_frame = cv2.warpAffine(temp_vision_frame, affine_matrix, crop_size, borderMode = cv2.BORDER_REPLICATE, flags = cv2.INTER_AREA) + return crop_vision_frame, affine_matrix + + +def warp_face_by_bounding_box(temp_vision_frame : VisionFrame, bounding_box : BoundingBox, crop_size : Size) -> Tuple[VisionFrame, Matrix]: + source_points = numpy.array([ [ bounding_box[0], bounding_box[1] ], [bounding_box[2], bounding_box[1] ], [ bounding_box[0], bounding_box[3] ] ]).astype(numpy.float32) + target_points = numpy.array([ [ 0, 0 ], [ crop_size[0], 0 ], [ 0, crop_size[1] ] ]).astype(numpy.float32) + affine_matrix = cv2.getAffineTransform(source_points, target_points) + if bounding_box[2] - bounding_box[0] > crop_size[0] or bounding_box[3] - bounding_box[1] > crop_size[1]: + interpolation_method = cv2.INTER_AREA + else: + interpolation_method = cv2.INTER_LINEAR + crop_vision_frame = cv2.warpAffine(temp_vision_frame, affine_matrix, crop_size, flags = interpolation_method) + return crop_vision_frame, affine_matrix + + +def warp_face_by_translation(temp_vision_frame : VisionFrame, translation : Translation, scale : float, crop_size : Size) -> Tuple[VisionFrame, Matrix]: + affine_matrix = numpy.array([ [ scale, 0, translation[0] ], [ 0, scale, translation[1] ] ]) + crop_vision_frame = cv2.warpAffine(temp_vision_frame, affine_matrix, crop_size) + return crop_vision_frame, affine_matrix + + +def paste_back(temp_vision_frame : VisionFrame, crop_vision_frame : VisionFrame, crop_mask : Mask, affine_matrix : Matrix) -> VisionFrame: + inverse_matrix = cv2.invertAffineTransform(affine_matrix) + temp_size = temp_vision_frame.shape[:2][::-1] + inverse_mask = cv2.warpAffine(crop_mask, inverse_matrix, temp_size).clip(0, 1) + inverse_vision_frame = cv2.warpAffine(crop_vision_frame, inverse_matrix, temp_size, borderMode = cv2.BORDER_REPLICATE) + paste_vision_frame = temp_vision_frame.copy() + paste_vision_frame[:, :, 0] = inverse_mask * inverse_vision_frame[:, :, 0] + (1 - inverse_mask) * temp_vision_frame[:, :, 0] + paste_vision_frame[:, :, 1] = inverse_mask * inverse_vision_frame[:, :, 1] + (1 - inverse_mask) * temp_vision_frame[:, :, 1] + paste_vision_frame[:, :, 2] = inverse_mask * inverse_vision_frame[:, :, 2] + (1 - inverse_mask) * temp_vision_frame[:, :, 2] + return paste_vision_frame + + +@lru_cache(maxsize = None) +def create_static_anchors(feature_stride : int, anchor_total : int, stride_height : int, stride_width : int) -> numpy.ndarray[Any, Any]: + y, x = numpy.mgrid[:stride_height, :stride_width][::-1] + anchors = numpy.stack((y, x), axis = -1) + anchors = (anchors * feature_stride).reshape((-1, 2)) + anchors = numpy.stack([ anchors ] * anchor_total, axis = 1).reshape((-1, 2)) + return anchors + + +def create_bounding_box_from_face_landmark_68(face_landmark_68 : FaceLandmark68) -> BoundingBox: + min_x, min_y = numpy.min(face_landmark_68, axis = 0) + max_x, max_y = numpy.max(face_landmark_68, axis = 0) + bounding_box = numpy.array([ min_x, min_y, max_x, max_y ]).astype(numpy.int16) + return bounding_box + + +def distance_to_bounding_box(points : numpy.ndarray[Any, Any], distance : numpy.ndarray[Any, Any]) -> BoundingBox: + x1 = points[:, 0] - distance[:, 0] + y1 = points[:, 1] - distance[:, 1] + x2 = points[:, 0] + distance[:, 2] + y2 = points[:, 1] + distance[:, 3] + bounding_box = numpy.column_stack([ x1, y1, x2, y2 ]) + return bounding_box + + +def distance_to_face_landmark_5(points : numpy.ndarray[Any, Any], distance : numpy.ndarray[Any, Any]) -> FaceLandmark5: + x = points[:, 0::2] + distance[:, 0::2] + y = points[:, 1::2] + distance[:, 1::2] + face_landmark_5 = numpy.stack((x, y), axis = -1) + return face_landmark_5 + + +def convert_face_landmark_68_to_5(face_landmark_68 : FaceLandmark68) -> FaceLandmark5: + face_landmark_5 = numpy.array( + [ + numpy.mean(face_landmark_68[36:42], axis = 0), + numpy.mean(face_landmark_68[42:48], axis = 0), + face_landmark_68[30], + face_landmark_68[48], + face_landmark_68[54] + ]) + return face_landmark_5 + + +def apply_nms(bounding_box_list : List[BoundingBox], iou_threshold : float) -> List[int]: + keep_indices = [] + dimension_list = numpy.reshape(bounding_box_list, (-1, 4)) + x1 = dimension_list[:, 0] + y1 = dimension_list[:, 1] + x2 = dimension_list[:, 2] + y2 = dimension_list[:, 3] + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + indices = numpy.arange(len(bounding_box_list)) + while indices.size > 0: + index = indices[0] + remain_indices = indices[1:] + keep_indices.append(index) + xx1 = numpy.maximum(x1[index], x1[remain_indices]) + yy1 = numpy.maximum(y1[index], y1[remain_indices]) + xx2 = numpy.minimum(x2[index], x2[remain_indices]) + yy2 = numpy.minimum(y2[index], y2[remain_indices]) + width = numpy.maximum(0, xx2 - xx1 + 1) + height = numpy.maximum(0, yy2 - yy1 + 1) + iou = width * height / (areas[index] + areas[remain_indices] - width * height) + indices = indices[numpy.where(iou <= iou_threshold)[0] + 1] + return keep_indices + + +def categorize_age(age : int) -> FaceAnalyserAge: + if age < 13: + return 'child' + elif age < 19: + return 'teen' + elif age < 60: + return 'adult' + return 'senior' + + +def categorize_gender(gender : int) -> FaceAnalyserGender: + if gender == 0: + return 'female' + return 'male' diff --git a/facefusion/face_masker.py b/facefusion/face_masker.py new file mode 100644 index 0000000000000000000000000000000000000000..8ae3cb1b9c920b727c0fac20c24284ce284c26be --- /dev/null +++ b/facefusion/face_masker.py @@ -0,0 +1,155 @@ +from typing import Any, Dict, List +from cv2.typing import Size +from functools import lru_cache +from time import sleep +import cv2 +import numpy +import onnxruntime + +import facefusion.globals +from facefusion import process_manager +from facefusion.thread_helper import thread_lock, conditional_thread_semaphore +from facefusion.typing import FaceLandmark68, VisionFrame, Mask, Padding, FaceMaskRegion, ModelSet +from facefusion.execution import apply_execution_provider_options +from facefusion.filesystem import resolve_relative_path, is_file +from facefusion.download import conditional_download + +FACE_OCCLUDER = None +FACE_PARSER = None +MODELS : ModelSet =\ +{ + 'face_occluder': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/face_occluder.onnx', + 'path': resolve_relative_path('../.assets/models/face_occluder.onnx') + }, + 'face_parser': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/face_parser.onnx', + 'path': resolve_relative_path('../.assets/models/face_parser.onnx') + } +} +FACE_MASK_REGIONS : Dict[FaceMaskRegion, int] =\ +{ + 'skin': 1, + 'left-eyebrow': 2, + 'right-eyebrow': 3, + 'left-eye': 4, + 'right-eye': 5, + 'glasses': 6, + 'nose': 10, + 'mouth': 11, + 'upper-lip': 12, + 'lower-lip': 13 +} + + +def get_face_occluder() -> Any: + global FACE_OCCLUDER + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if FACE_OCCLUDER is None: + model_path = MODELS.get('face_occluder').get('path') + FACE_OCCLUDER = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + return FACE_OCCLUDER + + +def get_face_parser() -> Any: + global FACE_PARSER + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if FACE_PARSER is None: + model_path = MODELS.get('face_parser').get('path') + FACE_PARSER = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + return FACE_PARSER + + +def clear_face_occluder() -> None: + global FACE_OCCLUDER + + FACE_OCCLUDER = None + + +def clear_face_parser() -> None: + global FACE_PARSER + + FACE_PARSER = None + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../.assets/models') + model_urls =\ + [ + MODELS.get('face_occluder').get('url'), + MODELS.get('face_parser').get('url') + ] + model_paths =\ + [ + MODELS.get('face_occluder').get('path'), + MODELS.get('face_parser').get('path') + ] + + if not facefusion.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, model_urls) + process_manager.end() + return all(is_file(model_path) for model_path in model_paths) + + +@lru_cache(maxsize = None) +def create_static_box_mask(crop_size : Size, face_mask_blur : float, face_mask_padding : Padding) -> Mask: + blur_amount = int(crop_size[0] * 0.5 * face_mask_blur) + blur_area = max(blur_amount // 2, 1) + box_mask : Mask = numpy.ones(crop_size, numpy.float32) + box_mask[:max(blur_area, int(crop_size[1] * face_mask_padding[0] / 100)), :] = 0 + box_mask[-max(blur_area, int(crop_size[1] * face_mask_padding[2] / 100)):, :] = 0 + box_mask[:, :max(blur_area, int(crop_size[0] * face_mask_padding[3] / 100))] = 0 + box_mask[:, -max(blur_area, int(crop_size[0] * face_mask_padding[1] / 100)):] = 0 + if blur_amount > 0: + box_mask = cv2.GaussianBlur(box_mask, (0, 0), blur_amount * 0.25) + return box_mask + + +def create_occlusion_mask(crop_vision_frame : VisionFrame) -> Mask: + face_occluder = get_face_occluder() + prepare_vision_frame = cv2.resize(crop_vision_frame, face_occluder.get_inputs()[0].shape[1:3][::-1]) + prepare_vision_frame = numpy.expand_dims(prepare_vision_frame, axis = 0).astype(numpy.float32) / 255 + prepare_vision_frame = prepare_vision_frame.transpose(0, 1, 2, 3) + with conditional_thread_semaphore(facefusion.globals.execution_providers): + occlusion_mask : Mask = face_occluder.run(None, + { + face_occluder.get_inputs()[0].name: prepare_vision_frame + })[0][0] + occlusion_mask = occlusion_mask.transpose(0, 1, 2).clip(0, 1).astype(numpy.float32) + occlusion_mask = cv2.resize(occlusion_mask, crop_vision_frame.shape[:2][::-1]) + occlusion_mask = (cv2.GaussianBlur(occlusion_mask.clip(0, 1), (0, 0), 5).clip(0.5, 1) - 0.5) * 2 + return occlusion_mask + + +def create_region_mask(crop_vision_frame : VisionFrame, face_mask_regions : List[FaceMaskRegion]) -> Mask: + face_parser = get_face_parser() + prepare_vision_frame = cv2.flip(cv2.resize(crop_vision_frame, (512, 512)), 1) + prepare_vision_frame = numpy.expand_dims(prepare_vision_frame, axis = 0).astype(numpy.float32)[:, :, ::-1] / 127.5 - 1 + prepare_vision_frame = prepare_vision_frame.transpose(0, 3, 1, 2) + with conditional_thread_semaphore(facefusion.globals.execution_providers): + region_mask : Mask = face_parser.run(None, + { + face_parser.get_inputs()[0].name: prepare_vision_frame + })[0][0] + region_mask = numpy.isin(region_mask.argmax(0), [ FACE_MASK_REGIONS[region] for region in face_mask_regions ]) + region_mask = cv2.resize(region_mask.astype(numpy.float32), crop_vision_frame.shape[:2][::-1]) + region_mask = (cv2.GaussianBlur(region_mask.clip(0, 1), (0, 0), 5).clip(0.5, 1) - 0.5) * 2 + return region_mask + + +def create_mouth_mask(face_landmark_68 : FaceLandmark68) -> Mask: + convex_hull = cv2.convexHull(face_landmark_68[numpy.r_[3:14, 31:36]].astype(numpy.int32)) + mouth_mask : Mask = numpy.zeros((512, 512)).astype(numpy.float32) + mouth_mask = cv2.fillConvexPoly(mouth_mask, convex_hull, 1.0) + mouth_mask = cv2.erode(mouth_mask.clip(0, 1), numpy.ones((21, 3))) + mouth_mask = cv2.GaussianBlur(mouth_mask, (0, 0), sigmaX = 1, sigmaY = 15) + return mouth_mask diff --git a/facefusion/face_store.py b/facefusion/face_store.py new file mode 100644 index 0000000000000000000000000000000000000000..7540bc9d72f863772a0304b197b0805816fe2808 --- /dev/null +++ b/facefusion/face_store.py @@ -0,0 +1,48 @@ +from typing import Optional, List +import hashlib +import numpy + +from facefusion.typing import VisionFrame, Face, FaceStore, FaceSet + +FACE_STORE: FaceStore =\ +{ + 'static_faces': {}, + 'reference_faces': {} +} + + +def get_static_faces(vision_frame : VisionFrame) -> Optional[List[Face]]: + frame_hash = create_frame_hash(vision_frame) + if frame_hash in FACE_STORE['static_faces']: + return FACE_STORE['static_faces'][frame_hash] + return None + + +def set_static_faces(vision_frame : VisionFrame, faces : List[Face]) -> None: + frame_hash = create_frame_hash(vision_frame) + if frame_hash: + FACE_STORE['static_faces'][frame_hash] = faces + + +def clear_static_faces() -> None: + FACE_STORE['static_faces'] = {} + + +def create_frame_hash(vision_frame : VisionFrame) -> Optional[str]: + return hashlib.sha1(vision_frame.tobytes()).hexdigest() if numpy.any(vision_frame) else None + + +def get_reference_faces() -> Optional[FaceSet]: + if FACE_STORE['reference_faces']: + return FACE_STORE['reference_faces'] + return None + + +def append_reference_face(name : str, face : Face) -> None: + if name not in FACE_STORE['reference_faces']: + FACE_STORE['reference_faces'][name] = [] + FACE_STORE['reference_faces'][name].append(face) + + +def clear_reference_faces() -> None: + FACE_STORE['reference_faces'] = {} diff --git a/facefusion/ffmpeg.py b/facefusion/ffmpeg.py new file mode 100644 index 0000000000000000000000000000000000000000..1a0b1286cbb811c10f86f755c23f8ba058ae25d5 --- /dev/null +++ b/facefusion/ffmpeg.py @@ -0,0 +1,144 @@ +from typing import List, Optional +import os +import subprocess +import filetype + +import facefusion.globals +from facefusion import logger, process_manager +from facefusion.typing import OutputVideoPreset, Fps, AudioBuffer +from facefusion.filesystem import get_temp_frames_pattern, get_temp_output_video_path +from facefusion.vision import restrict_video_fps + + +def run_ffmpeg(args : List[str]) -> bool: + commands = [ 'ffmpeg', '-hide_banner', '-loglevel', 'error' ] + commands.extend(args) + process = subprocess.Popen(commands, stderr = subprocess.PIPE, stdout = subprocess.PIPE) + + while process_manager.is_processing(): + try: + if facefusion.globals.log_level == 'debug': + log_debug(process) + return process.wait(timeout = 0.5) == 0 + except subprocess.TimeoutExpired: + continue + return process.returncode == 0 + + +def open_ffmpeg(args : List[str]) -> subprocess.Popen[bytes]: + commands = [ 'ffmpeg', '-hide_banner', '-loglevel', 'quiet' ] + commands.extend(args) + return subprocess.Popen(commands, stdin = subprocess.PIPE, stdout = subprocess.PIPE) + + +def log_debug(process : subprocess.Popen[bytes]) -> None: + _, stderr = process.communicate() + errors = stderr.decode().split(os.linesep) + + for error in errors: + if error.strip(): + logger.debug(error.strip(), __name__.upper()) + + +def extract_frames(target_path : str, temp_video_resolution : str, temp_video_fps : Fps) -> bool: + trim_frame_start = facefusion.globals.trim_frame_start + trim_frame_end = facefusion.globals.trim_frame_end + temp_frames_pattern = get_temp_frames_pattern(target_path, '%04d') + commands = [ '-i', target_path, '-s', str(temp_video_resolution), '-q:v', '0' ] + + if trim_frame_start is not None and trim_frame_end is not None: + commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ':end_frame=' + str(trim_frame_end) + ',fps=' + str(temp_video_fps) ]) + elif trim_frame_start is not None: + commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ',fps=' + str(temp_video_fps) ]) + elif trim_frame_end is not None: + commands.extend([ '-vf', 'trim=end_frame=' + str(trim_frame_end) + ',fps=' + str(temp_video_fps) ]) + else: + commands.extend([ '-vf', 'fps=' + str(temp_video_fps) ]) + commands.extend([ '-vsync', '0', temp_frames_pattern ]) + return run_ffmpeg(commands) + + +def merge_video(target_path : str, output_video_resolution : str, output_video_fps : Fps) -> bool: + temp_video_fps = restrict_video_fps(target_path, output_video_fps) + temp_output_video_path = get_temp_output_video_path(target_path) + temp_frames_pattern = get_temp_frames_pattern(target_path, '%04d') + commands = [ '-r', str(temp_video_fps), '-i', temp_frames_pattern, '-s', str(output_video_resolution), '-c:v', facefusion.globals.output_video_encoder ] + + if facefusion.globals.output_video_encoder in [ 'libx264', 'libx265' ]: + output_video_compression = round(51 - (facefusion.globals.output_video_quality * 0.51)) + commands.extend([ '-crf', str(output_video_compression), '-preset', facefusion.globals.output_video_preset ]) + if facefusion.globals.output_video_encoder in [ 'libvpx-vp9' ]: + output_video_compression = round(63 - (facefusion.globals.output_video_quality * 0.63)) + commands.extend([ '-crf', str(output_video_compression) ]) + if facefusion.globals.output_video_encoder in [ 'h264_nvenc', 'hevc_nvenc' ]: + output_video_compression = round(51 - (facefusion.globals.output_video_quality * 0.51)) + commands.extend([ '-cq', str(output_video_compression), '-preset', map_nvenc_preset(facefusion.globals.output_video_preset) ]) + if facefusion.globals.output_video_encoder in [ 'h264_amf', 'hevc_amf' ]: + output_video_compression = round(51 - (facefusion.globals.output_video_quality * 0.51)) + commands.extend([ '-qp_i', str(output_video_compression), '-qp_p', str(output_video_compression), '-quality', map_amf_preset(facefusion.globals.output_video_preset) ]) + commands.extend([ '-vf', 'framerate=fps=' + str(output_video_fps), '-pix_fmt', 'yuv420p', '-colorspace', 'bt709', '-y', temp_output_video_path ]) + return run_ffmpeg(commands) + + +def copy_image(target_path : str, output_path : str, temp_image_resolution : str) -> bool: + is_webp = filetype.guess_mime(target_path) == 'image/webp' + temp_image_compression = 100 if is_webp else 0 + commands = [ '-i', target_path, '-s', str(temp_image_resolution), '-q:v', str(temp_image_compression), '-y', output_path ] + return run_ffmpeg(commands) + + +def finalize_image(output_path : str, output_image_resolution : str) -> bool: + output_image_compression = round(31 - (facefusion.globals.output_image_quality * 0.31)) + commands = [ '-i', output_path, '-s', str(output_image_resolution), '-q:v', str(output_image_compression), '-y', output_path ] + return run_ffmpeg(commands) + + +def read_audio_buffer(target_path : str, sample_rate : int, channel_total : int) -> Optional[AudioBuffer]: + commands = [ '-i', target_path, '-vn', '-f', 's16le', '-acodec', 'pcm_s16le', '-ar', str(sample_rate), '-ac', str(channel_total), '-'] + process = open_ffmpeg(commands) + audio_buffer, _ = process.communicate() + if process.returncode == 0: + return audio_buffer + return None + + +def restore_audio(target_path : str, output_path : str, output_video_fps : Fps) -> bool: + trim_frame_start = facefusion.globals.trim_frame_start + trim_frame_end = facefusion.globals.trim_frame_end + temp_output_video_path = get_temp_output_video_path(target_path) + commands = [ '-i', temp_output_video_path ] + + if trim_frame_start is not None: + start_time = trim_frame_start / output_video_fps + commands.extend([ '-ss', str(start_time) ]) + if trim_frame_end is not None: + end_time = trim_frame_end / output_video_fps + commands.extend([ '-to', str(end_time) ]) + commands.extend([ '-i', target_path, '-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest', '-y', output_path ]) + return run_ffmpeg(commands) + + +def replace_audio(target_path : str, audio_path : str, output_path : str) -> bool: + temp_output_path = get_temp_output_video_path(target_path) + commands = [ '-i', temp_output_path, '-i', audio_path, '-af', 'apad', '-shortest', '-y', output_path ] + return run_ffmpeg(commands) + + +def map_nvenc_preset(output_video_preset : OutputVideoPreset) -> Optional[str]: + if output_video_preset in [ 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast' ]: + return 'fast' + if output_video_preset == 'medium': + return 'medium' + if output_video_preset in [ 'slow', 'slower', 'veryslow' ]: + return 'slow' + return None + + +def map_amf_preset(output_video_preset : OutputVideoPreset) -> Optional[str]: + if output_video_preset in [ 'ultrafast', 'superfast', 'veryfast' ]: + return 'speed' + if output_video_preset in [ 'faster', 'fast', 'medium' ]: + return 'balanced' + if output_video_preset in [ 'slow', 'slower', 'veryslow' ]: + return 'quality' + return None diff --git a/facefusion/filesystem.py b/facefusion/filesystem.py new file mode 100644 index 0000000000000000000000000000000000000000..61009838dbadd72bfcc0d203d4c4c4e22139a5e2 --- /dev/null +++ b/facefusion/filesystem.py @@ -0,0 +1,109 @@ +from typing import List, Optional +import glob +import os +import shutil +import tempfile +import filetype +from pathlib import Path + +import facefusion.globals + +TEMP_DIRECTORY_PATH = os.path.join(tempfile.gettempdir(), 'facefusion') +TEMP_OUTPUT_VIDEO_NAME = 'temp.mp4' + + +def get_temp_frame_paths(target_path : str) -> List[str]: + temp_frames_pattern = get_temp_frames_pattern(target_path, '*') + return sorted(glob.glob(temp_frames_pattern)) + + +def get_temp_frames_pattern(target_path : str, temp_frame_prefix : str) -> str: + temp_directory_path = get_temp_directory_path(target_path) + return os.path.join(temp_directory_path, temp_frame_prefix + '.' + facefusion.globals.temp_frame_format) + + +def get_temp_directory_path(target_path : str) -> str: + target_name, _ = os.path.splitext(os.path.basename(target_path)) + return os.path.join(TEMP_DIRECTORY_PATH, target_name) + + +def get_temp_output_video_path(target_path : str) -> str: + temp_directory_path = get_temp_directory_path(target_path) + return os.path.join(temp_directory_path, TEMP_OUTPUT_VIDEO_NAME) + + +def create_temp(target_path : str) -> None: + temp_directory_path = get_temp_directory_path(target_path) + Path(temp_directory_path).mkdir(parents = True, exist_ok = True) + + +def move_temp(target_path : str, output_path : str) -> None: + temp_output_video_path = get_temp_output_video_path(target_path) + if is_file(temp_output_video_path): + if is_file(output_path): + os.remove(output_path) + shutil.move(temp_output_video_path, output_path) + + +def clear_temp(target_path : str) -> None: + temp_directory_path = get_temp_directory_path(target_path) + parent_directory_path = os.path.dirname(temp_directory_path) + if not facefusion.globals.keep_temp and is_directory(temp_directory_path): + shutil.rmtree(temp_directory_path, ignore_errors = True) + if os.path.exists(parent_directory_path) and not os.listdir(parent_directory_path): + os.rmdir(parent_directory_path) + + +def is_file(file_path : str) -> bool: + return bool(file_path and os.path.isfile(file_path)) + + +def is_directory(directory_path : str) -> bool: + return bool(directory_path and os.path.isdir(directory_path)) + + +def is_audio(audio_path : str) -> bool: + return is_file(audio_path) and filetype.helpers.is_audio(audio_path) + + +def has_audio(audio_paths : List[str]) -> bool: + if audio_paths: + return any(is_audio(audio_path) for audio_path in audio_paths) + return False + + +def is_image(image_path : str) -> bool: + return is_file(image_path) and filetype.helpers.is_image(image_path) + + +def has_image(image_paths: List[str]) -> bool: + if image_paths: + return any(is_image(image_path) for image_path in image_paths) + return False + + +def is_video(video_path : str) -> bool: + return is_file(video_path) and filetype.helpers.is_video(video_path) + + +def filter_audio_paths(paths : List[str]) -> List[str]: + if paths: + return [ path for path in paths if is_audio(path) ] + return [] + + +def filter_image_paths(paths : List[str]) -> List[str]: + if paths: + return [ path for path in paths if is_image(path) ] + return [] + + +def resolve_relative_path(path : str) -> str: + return os.path.abspath(os.path.join(os.path.dirname(__file__), path)) + + +def list_directory(directory_path : str) -> Optional[List[str]]: + if is_directory(directory_path): + files = os.listdir(directory_path) + return sorted([ Path(file).stem for file in files if not Path(file).stem.startswith(('.', '__')) ]) + return None diff --git a/facefusion/globals.py b/facefusion/globals.py new file mode 100644 index 0000000000000000000000000000000000000000..d736880a77e1c3a08b56b1cd26b9425641d9bc5a --- /dev/null +++ b/facefusion/globals.py @@ -0,0 +1,57 @@ +from typing import List, Optional + +from facefusion.typing import LogLevel, VideoMemoryStrategy, FaceSelectorMode, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, FaceMaskType, FaceMaskRegion, OutputVideoEncoder, OutputVideoPreset, FaceDetectorModel, FaceRecognizerModel, TempFrameFormat, Padding + +# general +source_paths : Optional[List[str]] = None +target_path : Optional[str] = None +output_path : Optional[str] = None +# misc +force_download : Optional[bool] = None +skip_download : Optional[bool] = None +headless : Optional[bool] = None +log_level : Optional[LogLevel] = None +# execution +execution_providers : List[str] = [] +execution_thread_count : Optional[int] = None +execution_queue_count : Optional[int] = None +# memory +video_memory_strategy : Optional[VideoMemoryStrategy] = None +system_memory_limit : Optional[int] = None +# face analyser +face_analyser_order : Optional[FaceAnalyserOrder] = None +face_analyser_age : Optional[FaceAnalyserAge] = None +face_analyser_gender : Optional[FaceAnalyserGender] = None +face_detector_model : Optional[FaceDetectorModel] = None +face_detector_size : Optional[str] = None +face_detector_score : Optional[float] = None +face_landmarker_score : Optional[float] = None +face_recognizer_model : Optional[FaceRecognizerModel] = None +# face selector +face_selector_mode : Optional[FaceSelectorMode] = None +reference_face_position : Optional[int] = None +reference_face_distance : Optional[float] = None +reference_frame_number : Optional[int] = None +# face mask +face_mask_types : Optional[List[FaceMaskType]] = None +face_mask_blur : Optional[float] = None +face_mask_padding : Optional[Padding] = None +face_mask_regions : Optional[List[FaceMaskRegion]] = None +# frame extraction +trim_frame_start : Optional[int] = None +trim_frame_end : Optional[int] = None +temp_frame_format : Optional[TempFrameFormat] = None +keep_temp : Optional[bool] = None +# output creation +output_image_quality : Optional[int] = None +output_image_resolution : Optional[str] = None +output_video_encoder : Optional[OutputVideoEncoder] = None +output_video_preset : Optional[OutputVideoPreset] = None +output_video_quality : Optional[int] = None +output_video_resolution : Optional[str] = None +output_video_fps : Optional[float] = None +skip_audio : Optional[bool] = None +# frame processors +frame_processors : List[str] = [] +# uis +ui_layouts : List[str] = [] diff --git a/facefusion/installer.py b/facefusion/installer.py new file mode 100644 index 0000000000000000000000000000000000000000..9c2b4312bf6ef8e4ad9615b391f3534397944e8b --- /dev/null +++ b/facefusion/installer.py @@ -0,0 +1,77 @@ +from typing import Dict, Tuple +import sys +import os +import platform +import tempfile +import subprocess +import inquirer +from argparse import ArgumentParser, HelpFormatter + +from facefusion import metadata, wording + +if platform.system().lower() == 'darwin': + os.environ['SYSTEM_VERSION_COMPAT'] = '0' + +ONNXRUNTIMES : Dict[str, Tuple[str, str]] = {} + +if platform.system().lower() == 'darwin': + ONNXRUNTIMES['default'] = ('onnxruntime', '1.17.1') +else: + ONNXRUNTIMES['default'] = ('onnxruntime', '1.17.1') + ONNXRUNTIMES['cuda-12.2'] = ('onnxruntime-gpu', '1.17.1') + ONNXRUNTIMES['cuda-11.8'] = ('onnxruntime-gpu', '1.17.1') + ONNXRUNTIMES['openvino'] = ('onnxruntime-openvino', '1.17.1') +if platform.system().lower() == 'linux': + ONNXRUNTIMES['rocm-5.4.2'] = ('onnxruntime-rocm', '1.16.3') + ONNXRUNTIMES['rocm-5.6'] = ('onnxruntime-rocm', '1.16.3') +if platform.system().lower() == 'windows': + ONNXRUNTIMES['directml'] = ('onnxruntime-directml', '1.17.1') + + +def cli() -> None: + program = ArgumentParser(formatter_class = lambda prog: HelpFormatter(prog, max_help_position = 130)) + program.add_argument('--onnxruntime', help = wording.get('help.install_dependency').format(dependency = 'onnxruntime'), choices = ONNXRUNTIMES.keys()) + program.add_argument('--skip-conda', help = wording.get('help.skip_conda'), action = 'store_true') + program.add_argument('-v', '--version', version = metadata.get('name') + ' ' + metadata.get('version'), action = 'version') + run(program) + + +def run(program : ArgumentParser) -> None: + args = program.parse_args() + python_id = 'cp' + str(sys.version_info.major) + str(sys.version_info.minor) + + if not args.skip_conda and 'CONDA_PREFIX' not in os.environ: + sys.stdout.write(wording.get('conda_not_activated') + os.linesep) + sys.exit(1) + if args.onnxruntime: + answers =\ + { + 'onnxruntime': args.onnxruntime + } + else: + answers = inquirer.prompt( + [ + inquirer.List('onnxruntime', message = wording.get('help.install_dependency').format(dependency = 'onnxruntime'), choices = list(ONNXRUNTIMES.keys())) + ]) + if answers: + onnxruntime = answers['onnxruntime'] + onnxruntime_name, onnxruntime_version = ONNXRUNTIMES[onnxruntime] + + subprocess.call([ 'pip', 'install', '-r', 'requirements.txt', '--force-reinstall' ]) + if onnxruntime == 'rocm-5.4.2' or onnxruntime == 'rocm-5.6': + if python_id in [ 'cp39', 'cp310', 'cp311' ]: + rocm_version = onnxruntime.replace('-', '') + rocm_version = rocm_version.replace('.', '') + wheel_name = 'onnxruntime_training-' + onnxruntime_version + '+' + rocm_version + '-' + python_id + '-' + python_id + '-manylinux_2_17_x86_64.manylinux2014_x86_64.whl' + wheel_path = os.path.join(tempfile.gettempdir(), wheel_name) + wheel_url = 'https://download.onnxruntime.ai/' + wheel_name + subprocess.call([ 'curl', '--silent', '--location', '--continue-at', '-', '--output', wheel_path, wheel_url ]) + subprocess.call([ 'pip', 'uninstall', wheel_path, '-y', '-q' ]) + subprocess.call([ 'pip', 'install', wheel_path, '--force-reinstall' ]) + os.remove(wheel_path) + else: + subprocess.call([ 'pip', 'uninstall', 'onnxruntime', onnxruntime_name, '-y', '-q' ]) + if onnxruntime == 'cuda-12.2': + subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--extra-index-url', 'https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple', '--force-reinstall' ]) + else: + subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--force-reinstall' ]) diff --git a/facefusion/logger.py b/facefusion/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..e6e3ba08733ed0ef654a2cb51c048eecaff7813b --- /dev/null +++ b/facefusion/logger.py @@ -0,0 +1,47 @@ +from typing import Dict +from logging import basicConfig, getLogger, Logger, DEBUG, INFO, WARNING, ERROR + +from facefusion.typing import LogLevel + + +def init(log_level : LogLevel) -> None: + basicConfig(format = None) + get_package_logger().setLevel(get_log_levels()[log_level]) + + +def get_package_logger() -> Logger: + return getLogger('facefusion') + + +def debug(message : str, scope : str) -> None: + get_package_logger().debug('[' + scope + '] ' + message) + + +def info(message : str, scope : str) -> None: + get_package_logger().info('[' + scope + '] ' + message) + + +def warn(message : str, scope : str) -> None: + get_package_logger().warning('[' + scope + '] ' + message) + + +def error(message : str, scope : str) -> None: + get_package_logger().error('[' + scope + '] ' + message) + + +def enable() -> None: + get_package_logger().disabled = False + + +def disable() -> None: + get_package_logger().disabled = True + + +def get_log_levels() -> Dict[LogLevel, int]: + return\ + { + 'error': ERROR, + 'warn': WARNING, + 'info': INFO, + 'debug': DEBUG + } diff --git a/facefusion/memory.py b/facefusion/memory.py new file mode 100644 index 0000000000000000000000000000000000000000..845c7d3c7d283214428ead2d1b8a4ed53fea2687 --- /dev/null +++ b/facefusion/memory.py @@ -0,0 +1,21 @@ +import platform + +if platform.system().lower() == 'windows': + import ctypes +else: + import resource + + +def limit_system_memory(system_memory_limit : int = 1) -> bool: + if platform.system().lower() == 'darwin': + system_memory_limit = system_memory_limit * (1024 ** 6) + else: + system_memory_limit = system_memory_limit * (1024 ** 3) + try: + if platform.system().lower() == 'windows': + ctypes.windll.kernel32.SetProcessWorkingSetSize(-1, ctypes.c_size_t(system_memory_limit), ctypes.c_size_t(system_memory_limit)) # type: ignore[attr-defined] + else: + resource.setrlimit(resource.RLIMIT_DATA, (system_memory_limit, system_memory_limit)) + return True + except Exception: + return False diff --git a/facefusion/metadata.py b/facefusion/metadata.py new file mode 100644 index 0000000000000000000000000000000000000000..b4688497fd19d2278d7918e101226b6bcc441c14 --- /dev/null +++ b/facefusion/metadata.py @@ -0,0 +1,13 @@ +METADATA =\ +{ + 'name': 'FaceFusion', + 'description': 'Next generation face swapper and enhancer', + 'version': '2.5.2', + 'license': 'MIT', + 'author': 'Henry Ruhs', + 'url': 'https://facefusion.io' +} + + +def get(key : str) -> str: + return METADATA[key] diff --git a/facefusion/normalizer.py b/facefusion/normalizer.py new file mode 100644 index 0000000000000000000000000000000000000000..5324f7ec4651d8a008112715c24d9a05deabb9cd --- /dev/null +++ b/facefusion/normalizer.py @@ -0,0 +1,43 @@ +from typing import List, Optional +import hashlib +import os + +import facefusion.globals +from facefusion.filesystem import is_directory +from facefusion.typing import Padding, Fps + + +def normalize_output_path(target_path : Optional[str], output_path : Optional[str]) -> Optional[str]: + if target_path and output_path: + target_name, target_extension = os.path.splitext(os.path.basename(target_path)) + if is_directory(output_path): + output_hash = hashlib.sha1(str(facefusion.globals.__dict__).encode('utf-8')).hexdigest()[:8] + output_name = target_name + '-' + output_hash + return os.path.join(output_path, output_name + target_extension) + output_name, output_extension = os.path.splitext(os.path.basename(output_path)) + output_directory_path = os.path.dirname(output_path) + if is_directory(output_directory_path) and output_extension: + return os.path.join(output_directory_path, output_name + target_extension) + return None + + +def normalize_padding(padding : Optional[List[int]]) -> Optional[Padding]: + if padding and len(padding) == 1: + return tuple([ padding[0], padding[0], padding[0], padding[0] ]) # type: ignore[return-value] + if padding and len(padding) == 2: + return tuple([ padding[0], padding[1], padding[0], padding[1] ]) # type: ignore[return-value] + if padding and len(padding) == 3: + return tuple([ padding[0], padding[1], padding[2], padding[1] ]) # type: ignore[return-value] + if padding and len(padding) == 4: + return tuple(padding) # type: ignore[return-value] + return None + + +def normalize_fps(fps : Optional[float]) -> Optional[Fps]: + if fps is not None: + if fps < 1.0: + return 1.0 + if fps > 60.0: + return 60.0 + return fps + return None diff --git a/facefusion/process_manager.py b/facefusion/process_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..3d5cce0fbb64bf8a06a960867075b2f8376d3203 --- /dev/null +++ b/facefusion/process_manager.py @@ -0,0 +1,53 @@ +from typing import Generator, List + +from facefusion.typing import QueuePayload, ProcessState + +PROCESS_STATE : ProcessState = 'pending' + + +def get_process_state() -> ProcessState: + return PROCESS_STATE + + +def set_process_state(process_state : ProcessState) -> None: + global PROCESS_STATE + + PROCESS_STATE = process_state + + +def is_checking() -> bool: + return get_process_state() == 'checking' + + +def is_processing() -> bool: + return get_process_state() == 'processing' + + +def is_stopping() -> bool: + return get_process_state() == 'stopping' + + +def is_pending() -> bool: + return get_process_state() == 'pending' + + +def check() -> None: + set_process_state('checking') + + +def start() -> None: + set_process_state('processing') + + +def stop() -> None: + set_process_state('stopping') + + +def end() -> None: + set_process_state('pending') + + +def manage(queue_payloads : List[QueuePayload]) -> Generator[QueuePayload, None, None]: + for query_payload in queue_payloads: + if is_processing(): + yield query_payload diff --git a/facefusion/processors/__init__.py b/facefusion/processors/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/facefusion/processors/__pycache__/__init__.cpython-310.pyc b/facefusion/processors/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1cd377db7c5090eba6ff5183db9b9dfdebd8a0af Binary files /dev/null and b/facefusion/processors/__pycache__/__init__.cpython-310.pyc differ diff --git a/facefusion/processors/frame/__init__.py b/facefusion/processors/frame/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/facefusion/processors/frame/__pycache__/__init__.cpython-310.pyc b/facefusion/processors/frame/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f1d0098ac80573d5bcd730234b34942d75929b2a Binary files /dev/null and b/facefusion/processors/frame/__pycache__/__init__.cpython-310.pyc differ diff --git a/facefusion/processors/frame/__pycache__/choices.cpython-310.pyc b/facefusion/processors/frame/__pycache__/choices.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..53b4abd142a6e704a7a04d63edd686167a56c507 Binary files /dev/null and b/facefusion/processors/frame/__pycache__/choices.cpython-310.pyc differ diff --git a/facefusion/processors/frame/__pycache__/core.cpython-310.pyc b/facefusion/processors/frame/__pycache__/core.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..49e2e1c80cd29f57e117f61f06f316b3bf6f08b6 Binary files /dev/null and b/facefusion/processors/frame/__pycache__/core.cpython-310.pyc differ diff --git a/facefusion/processors/frame/__pycache__/globals.cpython-310.pyc b/facefusion/processors/frame/__pycache__/globals.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8386cd865afbda86994566c7386f8d3aaf6e75d7 Binary files /dev/null and b/facefusion/processors/frame/__pycache__/globals.cpython-310.pyc differ diff --git a/facefusion/processors/frame/__pycache__/typings.cpython-310.pyc b/facefusion/processors/frame/__pycache__/typings.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7585e9b20928de93da19c9eee8d1b5c11b5e0dcc Binary files /dev/null and b/facefusion/processors/frame/__pycache__/typings.cpython-310.pyc differ diff --git a/facefusion/processors/frame/choices.py b/facefusion/processors/frame/choices.py new file mode 100644 index 0000000000000000000000000000000000000000..7721c150413bc76c1f3d2875fad01a4fe39992ef --- /dev/null +++ b/facefusion/processors/frame/choices.py @@ -0,0 +1,16 @@ +from typing import List + +from facefusion.common_helper import create_int_range +from facefusion.processors.frame.typings import FaceDebuggerItem, FaceEnhancerModel, FaceSwapperModel, FrameColorizerModel, FrameEnhancerModel, LipSyncerModel + +face_debugger_items : List[FaceDebuggerItem] = [ 'bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-landmark-68/5', 'face-mask', 'face-detector-score', 'face-landmarker-score', 'age', 'gender' ] +face_enhancer_models : List[FaceEnhancerModel] = [ 'codeformer', 'gfpgan_1.2', 'gfpgan_1.3', 'gfpgan_1.4', 'gpen_bfr_256', 'gpen_bfr_512', 'gpen_bfr_1024', 'gpen_bfr_2048', 'restoreformer_plus_plus' ] +face_swapper_models : List[FaceSwapperModel] = [ 'blendswap_256', 'inswapper_128', 'inswapper_128_fp16', 'simswap_256', 'simswap_512_unofficial', 'uniface_256' ] +frame_colorizer_models : List[FrameColorizerModel] = [ 'ddcolor', 'ddcolor_artistic', 'deoldify', 'deoldify_artistic', 'deoldify_stable' ] +frame_colorizer_sizes : List[str] = [ '192x192', '256x256', '384x384', '512x512' ] +frame_enhancer_models : List[FrameEnhancerModel] = [ 'lsdir_x4', 'nomos8k_sc_x4', 'real_esrgan_x2', 'real_esrgan_x2_fp16', 'real_esrgan_x4', 'real_esrgan_x4_fp16', 'real_hatgan_x4', 'span_kendata_x4' ] +lip_syncer_models : List[LipSyncerModel] = [ 'wav2lip_gan' ] + +face_enhancer_blend_range : List[int] = create_int_range(0, 100, 1) +frame_colorizer_blend_range : List[int] = create_int_range(0, 100, 1) +frame_enhancer_blend_range : List[int] = create_int_range(0, 100, 1) diff --git a/facefusion/processors/frame/core.py b/facefusion/processors/frame/core.py new file mode 100644 index 0000000000000000000000000000000000000000..8b3f12b651db766146d0142ad04aa42f8a0c766e --- /dev/null +++ b/facefusion/processors/frame/core.py @@ -0,0 +1,116 @@ +import os +import sys +import importlib +from concurrent.futures import ThreadPoolExecutor, as_completed +from queue import Queue +from types import ModuleType +from typing import Any, List +from tqdm import tqdm + +import facefusion.globals +from facefusion.typing import ProcessFrames, QueuePayload +from facefusion.execution import encode_execution_providers +from facefusion import logger, wording + +FRAME_PROCESSORS_MODULES : List[ModuleType] = [] +FRAME_PROCESSORS_METHODS =\ +[ + 'get_frame_processor', + 'clear_frame_processor', + 'get_options', + 'set_options', + 'register_args', + 'apply_args', + 'pre_check', + 'post_check', + 'pre_process', + 'post_process', + 'get_reference_frame', + 'process_frame', + 'process_frames', + 'process_image', + 'process_video' +] + + +def load_frame_processor_module(frame_processor : str) -> Any: + try: + frame_processor_module = importlib.import_module('facefusion.processors.frame.modules.' + frame_processor) + for method_name in FRAME_PROCESSORS_METHODS: + if not hasattr(frame_processor_module, method_name): + raise NotImplementedError + except ModuleNotFoundError as exception: + logger.error(wording.get('frame_processor_not_loaded').format(frame_processor = frame_processor), __name__.upper()) + logger.debug(exception.msg, __name__.upper()) + sys.exit(1) + except NotImplementedError: + logger.error(wording.get('frame_processor_not_implemented').format(frame_processor = frame_processor), __name__.upper()) + sys.exit(1) + return frame_processor_module + + +def get_frame_processors_modules(frame_processors : List[str]) -> List[ModuleType]: + global FRAME_PROCESSORS_MODULES + + if not FRAME_PROCESSORS_MODULES: + for frame_processor in frame_processors: + frame_processor_module = load_frame_processor_module(frame_processor) + FRAME_PROCESSORS_MODULES.append(frame_processor_module) + return FRAME_PROCESSORS_MODULES + + +def clear_frame_processors_modules() -> None: + global FRAME_PROCESSORS_MODULES + + for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): + frame_processor_module.clear_frame_processor() + FRAME_PROCESSORS_MODULES = [] + + +def multi_process_frames(source_paths : List[str], temp_frame_paths : List[str], process_frames : ProcessFrames) -> None: + queue_payloads = create_queue_payloads(temp_frame_paths) + with tqdm(total = len(queue_payloads), desc = wording.get('processing'), unit = 'frame', ascii = ' =', disable = facefusion.globals.log_level in [ 'warn', 'error' ]) as progress: + progress.set_postfix( + { + 'execution_providers': encode_execution_providers(facefusion.globals.execution_providers), + 'execution_thread_count': facefusion.globals.execution_thread_count, + 'execution_queue_count': facefusion.globals.execution_queue_count + }) + with ThreadPoolExecutor(max_workers = facefusion.globals.execution_thread_count) as executor: + futures = [] + queue : Queue[QueuePayload] = create_queue(queue_payloads) + queue_per_future = max(len(queue_payloads) // facefusion.globals.execution_thread_count * facefusion.globals.execution_queue_count, 1) + while not queue.empty(): + future = executor.submit(process_frames, source_paths, pick_queue(queue, queue_per_future), progress.update) + futures.append(future) + for future_done in as_completed(futures): + future_done.result() + + +def create_queue(queue_payloads : List[QueuePayload]) -> Queue[QueuePayload]: + queue : Queue[QueuePayload] = Queue() + for queue_payload in queue_payloads: + queue.put(queue_payload) + return queue + + +def pick_queue(queue : Queue[QueuePayload], queue_per_future : int) -> List[QueuePayload]: + queues = [] + for _ in range(queue_per_future): + if not queue.empty(): + queues.append(queue.get()) + return queues + + +def create_queue_payloads(temp_frame_paths : List[str]) -> List[QueuePayload]: + queue_payloads = [] + temp_frame_paths = sorted(temp_frame_paths, key = os.path.basename) + + for frame_number, frame_path in enumerate(temp_frame_paths): + frame_payload : QueuePayload =\ + { + 'frame_number': frame_number, + 'frame_path': frame_path + } + queue_payloads.append(frame_payload) + return queue_payloads diff --git a/facefusion/processors/frame/globals.py b/facefusion/processors/frame/globals.py new file mode 100644 index 0000000000000000000000000000000000000000..76ab2b2a412ee9b9f779f6fd27fbb48c2ad9e0d0 --- /dev/null +++ b/facefusion/processors/frame/globals.py @@ -0,0 +1,14 @@ +from typing import List, Optional + +from facefusion.processors.frame.typings import FaceDebuggerItem, FaceEnhancerModel, FaceSwapperModel, FrameColorizerModel, FrameEnhancerModel, LipSyncerModel + +face_debugger_items : Optional[List[FaceDebuggerItem]] = None +face_enhancer_model : Optional[FaceEnhancerModel] = None +face_enhancer_blend : Optional[int] = None +face_swapper_model : Optional[FaceSwapperModel] = None +frame_colorizer_model : Optional[FrameColorizerModel] = None +frame_colorizer_blend : Optional[int] = None +frame_colorizer_size : Optional[str] = None +frame_enhancer_model : Optional[FrameEnhancerModel] = None +frame_enhancer_blend : Optional[int] = None +lip_syncer_model : Optional[LipSyncerModel] = None diff --git a/facefusion/processors/frame/modules/__init__.py b/facefusion/processors/frame/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/facefusion/processors/frame/modules/__pycache__/__init__.cpython-310.pyc b/facefusion/processors/frame/modules/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..21aac86459cd78a49f54622e58261f97941799b6 Binary files /dev/null and b/facefusion/processors/frame/modules/__pycache__/__init__.cpython-310.pyc differ diff --git a/facefusion/processors/frame/modules/__pycache__/face_debugger.cpython-310.pyc b/facefusion/processors/frame/modules/__pycache__/face_debugger.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..edc8cf1d139d9bc977acb288e36dd52a1331cee8 Binary files /dev/null and b/facefusion/processors/frame/modules/__pycache__/face_debugger.cpython-310.pyc differ diff --git a/facefusion/processors/frame/modules/__pycache__/face_enhancer.cpython-310.pyc b/facefusion/processors/frame/modules/__pycache__/face_enhancer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3871bb5a1d382ce1c8f0a58a7561be96b3f2f435 Binary files /dev/null and b/facefusion/processors/frame/modules/__pycache__/face_enhancer.cpython-310.pyc differ diff --git a/facefusion/processors/frame/modules/__pycache__/face_swapper.cpython-310.pyc b/facefusion/processors/frame/modules/__pycache__/face_swapper.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c2765e63cee46a3a4c8fd13f1d9c22ac1cd45053 Binary files /dev/null and b/facefusion/processors/frame/modules/__pycache__/face_swapper.cpython-310.pyc differ diff --git a/facefusion/processors/frame/modules/__pycache__/frame_colorizer.cpython-310.pyc b/facefusion/processors/frame/modules/__pycache__/frame_colorizer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ee320cca35438b786df7d0c6fd3d4d11762150e4 Binary files /dev/null and b/facefusion/processors/frame/modules/__pycache__/frame_colorizer.cpython-310.pyc differ diff --git a/facefusion/processors/frame/modules/__pycache__/frame_enhancer.cpython-310.pyc b/facefusion/processors/frame/modules/__pycache__/frame_enhancer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..894c92658525d07eae2bd3b6d985516b0b03f07a Binary files /dev/null and b/facefusion/processors/frame/modules/__pycache__/frame_enhancer.cpython-310.pyc differ diff --git a/facefusion/processors/frame/modules/__pycache__/lip_syncer.cpython-310.pyc b/facefusion/processors/frame/modules/__pycache__/lip_syncer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9e9af551af14c003eeb33a49892361307f48a73a Binary files /dev/null and b/facefusion/processors/frame/modules/__pycache__/lip_syncer.cpython-310.pyc differ diff --git a/facefusion/processors/frame/modules/face_debugger.py b/facefusion/processors/frame/modules/face_debugger.py new file mode 100644 index 0000000000000000000000000000000000000000..ded5c64514e93639136d454a018d0d5646e511f2 --- /dev/null +++ b/facefusion/processors/frame/modules/face_debugger.py @@ -0,0 +1,192 @@ +from typing import Any, List, Literal +from argparse import ArgumentParser +import cv2 +import numpy + +import facefusion.globals +import facefusion.processors.frame.core as frame_processors +from facefusion import config, process_manager, wording +from facefusion.face_analyser import get_one_face, get_many_faces, find_similar_faces, clear_face_analyser +from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, create_region_mask, clear_face_occluder, clear_face_parser +from facefusion.face_helper import warp_face_by_face_landmark_5, categorize_age, categorize_gender +from facefusion.face_store import get_reference_faces +from facefusion.content_analyser import clear_content_analyser +from facefusion.typing import Face, VisionFrame, UpdateProgress, ProcessMode, QueuePayload +from facefusion.vision import read_image, read_static_image, write_image +from facefusion.processors.frame.typings import FaceDebuggerInputs +from facefusion.processors.frame import globals as frame_processors_globals, choices as frame_processors_choices + +NAME = __name__.upper() + + +def get_frame_processor() -> None: + pass + + +def clear_frame_processor() -> None: + pass + + +def get_options(key : Literal['model']) -> None: + pass + + +def set_options(key : Literal['model'], value : Any) -> None: + pass + + +def register_args(program : ArgumentParser) -> None: + program.add_argument('--face-debugger-items', help = wording.get('help.face_debugger_items').format(choices = ', '.join(frame_processors_choices.face_debugger_items)), default = config.get_str_list('frame_processors.face_debugger_items', 'face-landmark-5/68 face-mask'), choices = frame_processors_choices.face_debugger_items, nargs = '+', metavar = 'FACE_DEBUGGER_ITEMS') + + +def apply_args(program : ArgumentParser) -> None: + args = program.parse_args() + frame_processors_globals.face_debugger_items = args.face_debugger_items + + +def pre_check() -> bool: + return True + + +def post_check() -> bool: + return True + + +def pre_process(mode : ProcessMode) -> bool: + return True + + +def post_process() -> None: + read_static_image.cache_clear() + if facefusion.globals.video_memory_strategy == 'strict' or facefusion.globals.video_memory_strategy == 'moderate': + clear_frame_processor() + if facefusion.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() + clear_face_occluder() + clear_face_parser() + + +def debug_face(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + primary_color = (0, 0, 255) + secondary_color = (0, 255, 0) + tertiary_color = (255, 255, 0) + bounding_box = target_face.bounding_box.astype(numpy.int32) + temp_vision_frame = temp_vision_frame.copy() + has_face_landmark_5_fallback = numpy.array_equal(target_face.landmarks.get('5'), target_face.landmarks.get('5/68')) + has_face_landmark_68_fallback = numpy.array_equal(target_face.landmarks.get('68'), target_face.landmarks.get('68/5')) + + if 'bounding-box' in frame_processors_globals.face_debugger_items: + cv2.rectangle(temp_vision_frame, (bounding_box[0], bounding_box[1]), (bounding_box[2], bounding_box[3]), primary_color, 2) + if 'face-mask' in frame_processors_globals.face_debugger_items: + crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmarks.get('5/68'), 'arcface_128_v2', (512, 512)) + inverse_matrix = cv2.invertAffineTransform(affine_matrix) + temp_size = temp_vision_frame.shape[:2][::-1] + crop_mask_list = [] + if 'box' in facefusion.globals.face_mask_types: + box_mask = create_static_box_mask(crop_vision_frame.shape[:2][::-1], 0, facefusion.globals.face_mask_padding) + crop_mask_list.append(box_mask) + if 'occlusion' in facefusion.globals.face_mask_types: + occlusion_mask = create_occlusion_mask(crop_vision_frame) + crop_mask_list.append(occlusion_mask) + if 'region' in facefusion.globals.face_mask_types: + region_mask = create_region_mask(crop_vision_frame, facefusion.globals.face_mask_regions) + crop_mask_list.append(region_mask) + crop_mask = numpy.minimum.reduce(crop_mask_list).clip(0, 1) + crop_mask = (crop_mask * 255).astype(numpy.uint8) + inverse_vision_frame = cv2.warpAffine(crop_mask, inverse_matrix, temp_size) + inverse_vision_frame = cv2.threshold(inverse_vision_frame, 100, 255, cv2.THRESH_BINARY)[1] + inverse_vision_frame[inverse_vision_frame > 0] = 255 + inverse_contours = cv2.findContours(inverse_vision_frame, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)[0] + cv2.drawContours(temp_vision_frame, inverse_contours, -1, tertiary_color if has_face_landmark_5_fallback else secondary_color, 2) + if 'face-landmark-5' in frame_processors_globals.face_debugger_items and numpy.any(target_face.landmarks.get('5')): + face_landmark_5 = target_face.landmarks.get('5').astype(numpy.int32) + for index in range(face_landmark_5.shape[0]): + cv2.circle(temp_vision_frame, (face_landmark_5[index][0], face_landmark_5[index][1]), 3, primary_color, -1) + if 'face-landmark-5/68' in frame_processors_globals.face_debugger_items and numpy.any(target_face.landmarks.get('5/68')): + face_landmark_5_68 = target_face.landmarks.get('5/68').astype(numpy.int32) + for index in range(face_landmark_5_68.shape[0]): + cv2.circle(temp_vision_frame, (face_landmark_5_68[index][0], face_landmark_5_68[index][1]), 3, tertiary_color if has_face_landmark_5_fallback else secondary_color, -1) + if 'face-landmark-68' in frame_processors_globals.face_debugger_items and numpy.any(target_face.landmarks.get('68')): + face_landmark_68 = target_face.landmarks.get('68').astype(numpy.int32) + for index in range(face_landmark_68.shape[0]): + cv2.circle(temp_vision_frame, (face_landmark_68[index][0], face_landmark_68[index][1]), 3, tertiary_color if has_face_landmark_68_fallback else secondary_color, -1) + if 'face-landmark-68/5' in frame_processors_globals.face_debugger_items and numpy.any(target_face.landmarks.get('68')): + face_landmark_68 = target_face.landmarks.get('68/5').astype(numpy.int32) + for index in range(face_landmark_68.shape[0]): + cv2.circle(temp_vision_frame, (face_landmark_68[index][0], face_landmark_68[index][1]), 3, primary_color, -1) + if bounding_box[3] - bounding_box[1] > 50 and bounding_box[2] - bounding_box[0] > 50: + top = bounding_box[1] + left = bounding_box[0] - 20 + if 'face-detector-score' in frame_processors_globals.face_debugger_items: + face_score_text = str(round(target_face.scores.get('detector'), 2)) + top = top + 20 + cv2.putText(temp_vision_frame, face_score_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, primary_color, 2) + if 'face-landmarker-score' in frame_processors_globals.face_debugger_items: + face_score_text = str(round(target_face.scores.get('landmarker'), 2)) + top = top + 20 + cv2.putText(temp_vision_frame, face_score_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, tertiary_color if has_face_landmark_5_fallback else secondary_color, 2) + if 'age' in frame_processors_globals.face_debugger_items: + face_age_text = categorize_age(target_face.age) + top = top + 20 + cv2.putText(temp_vision_frame, face_age_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, primary_color, 2) + if 'gender' in frame_processors_globals.face_debugger_items: + face_gender_text = categorize_gender(target_face.gender) + top = top + 20 + cv2.putText(temp_vision_frame, face_gender_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, primary_color, 2) + return temp_vision_frame + + +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + pass + + +def process_frame(inputs : FaceDebuggerInputs) -> VisionFrame: + reference_faces = inputs.get('reference_faces') + target_vision_frame = inputs.get('target_vision_frame') + + if facefusion.globals.face_selector_mode == 'many': + many_faces = get_many_faces(target_vision_frame) + if many_faces: + for target_face in many_faces: + target_vision_frame = debug_face(target_face, target_vision_frame) + if facefusion.globals.face_selector_mode == 'one': + target_face = get_one_face(target_vision_frame) + if target_face: + target_vision_frame = debug_face(target_face, target_vision_frame) + if facefusion.globals.face_selector_mode == 'reference': + similar_faces = find_similar_faces(reference_faces, target_vision_frame, facefusion.globals.reference_face_distance) + if similar_faces: + for similar_face in similar_faces: + target_vision_frame = debug_face(similar_face, target_vision_frame) + return target_vision_frame + + +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: + reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None + + for queue_payload in process_manager.manage(queue_payloads): + target_vision_path = queue_payload['frame_path'] + target_vision_frame = read_image(target_vision_path) + output_vision_frame = process_frame( + { + 'reference_faces': reference_faces, + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, output_vision_frame) + update_progress(1) + + +def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: + reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None + target_vision_frame = read_static_image(target_path) + output_vision_frame = process_frame( + { + 'reference_faces': reference_faces, + 'target_vision_frame': target_vision_frame + }) + write_image(output_path, output_vision_frame) + + +def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: + frame_processors.multi_process_frames(source_paths, temp_frame_paths, process_frames) diff --git a/facefusion/processors/frame/modules/face_enhancer.py b/facefusion/processors/frame/modules/face_enhancer.py new file mode 100644 index 0000000000000000000000000000000000000000..ee56da9c3c469ad050aa198811801eb7712d1db2 --- /dev/null +++ b/facefusion/processors/frame/modules/face_enhancer.py @@ -0,0 +1,301 @@ +from typing import Any, List, Literal, Optional +from argparse import ArgumentParser +from time import sleep +import cv2 +import numpy +import onnxruntime + +import facefusion.globals +import facefusion.processors.frame.core as frame_processors +from facefusion import config, process_manager, logger, wording +from facefusion.face_analyser import get_many_faces, clear_face_analyser, find_similar_faces, get_one_face +from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, clear_face_occluder +from facefusion.face_helper import warp_face_by_face_landmark_5, paste_back +from facefusion.execution import apply_execution_provider_options +from facefusion.content_analyser import clear_content_analyser +from facefusion.face_store import get_reference_faces +from facefusion.normalizer import normalize_output_path +from facefusion.thread_helper import thread_lock, thread_semaphore +from facefusion.typing import Face, VisionFrame, UpdateProgress, ProcessMode, ModelSet, OptionsWithModel, QueuePayload +from facefusion.common_helper import create_metavar +from facefusion.filesystem import is_file, is_image, is_video, resolve_relative_path +from facefusion.download import conditional_download, is_download_done +from facefusion.vision import read_image, read_static_image, write_image +from facefusion.processors.frame.typings import FaceEnhancerInputs +from facefusion.processors.frame import globals as frame_processors_globals +from facefusion.processors.frame import choices as frame_processors_choices + +FRAME_PROCESSOR = None +NAME = __name__.upper() +MODELS : ModelSet =\ +{ + 'codeformer': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/codeformer.onnx', + 'path': resolve_relative_path('../.assets/models/codeformer.onnx'), + 'template': 'ffhq_512', + 'size': (512, 512) + }, + 'gfpgan_1.2': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gfpgan_1.2.onnx', + 'path': resolve_relative_path('../.assets/models/gfpgan_1.2.onnx'), + 'template': 'ffhq_512', + 'size': (512, 512) + }, + 'gfpgan_1.3': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gfpgan_1.3.onnx', + 'path': resolve_relative_path('../.assets/models/gfpgan_1.3.onnx'), + 'template': 'ffhq_512', + 'size': (512, 512) + }, + 'gfpgan_1.4': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gfpgan_1.4.onnx', + 'path': resolve_relative_path('../.assets/models/gfpgan_1.4.onnx'), + 'template': 'ffhq_512', + 'size': (512, 512) + }, + 'gpen_bfr_256': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gpen_bfr_256.onnx', + 'path': resolve_relative_path('../.assets/models/gpen_bfr_256.onnx'), + 'template': 'arcface_128_v2', + 'size': (256, 256) + }, + 'gpen_bfr_512': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gpen_bfr_512.onnx', + 'path': resolve_relative_path('../.assets/models/gpen_bfr_512.onnx'), + 'template': 'ffhq_512', + 'size': (512, 512) + }, + 'gpen_bfr_1024': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gpen_bfr_1024.onnx', + 'path': resolve_relative_path('../.assets/models/gpen_bfr_1024.onnx'), + 'template': 'ffhq_512', + 'size': (1024, 1024) + }, + 'gpen_bfr_2048': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gpen_bfr_2048.onnx', + 'path': resolve_relative_path('../.assets/models/gpen_bfr_2048.onnx'), + 'template': 'ffhq_512', + 'size': (2048, 2048) + }, + 'restoreformer_plus_plus': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/restoreformer_plus_plus.onnx', + 'path': resolve_relative_path('../.assets/models/restoreformer_plus_plus.onnx'), + 'template': 'ffhq_512', + 'size': (512, 512) + } +} +OPTIONS : Optional[OptionsWithModel] = None + + +def get_frame_processor() -> Any: + global FRAME_PROCESSOR + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if FRAME_PROCESSOR is None: + model_path = get_options('model').get('path') + FRAME_PROCESSOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + return FRAME_PROCESSOR + + +def clear_frame_processor() -> None: + global FRAME_PROCESSOR + + FRAME_PROCESSOR = None + + +def get_options(key : Literal['model']) -> Any: + global OPTIONS + + if OPTIONS is None: + OPTIONS =\ + { + 'model': MODELS[frame_processors_globals.face_enhancer_model] + } + return OPTIONS.get(key) + + +def set_options(key : Literal['model'], value : Any) -> None: + global OPTIONS + + OPTIONS[key] = value + + +def register_args(program : ArgumentParser) -> None: + program.add_argument('--face-enhancer-model', help = wording.get('help.face_enhancer_model'), default = config.get_str_value('frame_processors.face_enhancer_model', 'gfpgan_1.4'), choices = frame_processors_choices.face_enhancer_models) + program.add_argument('--face-enhancer-blend', help = wording.get('help.face_enhancer_blend'), type = int, default = config.get_int_value('frame_processors.face_enhancer_blend', '80'), choices = frame_processors_choices.face_enhancer_blend_range, metavar = create_metavar(frame_processors_choices.face_enhancer_blend_range)) + + +def apply_args(program : ArgumentParser) -> None: + args = program.parse_args() + frame_processors_globals.face_enhancer_model = args.face_enhancer_model + frame_processors_globals.face_enhancer_blend = args.face_enhancer_blend + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../.assets/models') + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not facefusion.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, [ model_url ]) + process_manager.end() + return is_file(model_path) + + +def post_check() -> bool: + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not facefusion.globals.skip_download and not is_download_done(model_url, model_path): + logger.error(wording.get('model_download_not_done') + wording.get('exclamation_mark'), NAME) + return False + if not is_file(model_path): + logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def pre_process(mode : ProcessMode) -> bool: + if mode in [ 'output', 'preview' ] and not is_image(facefusion.globals.target_path) and not is_video(facefusion.globals.target_path): + logger.error(wording.get('select_image_or_video_target') + wording.get('exclamation_mark'), NAME) + return False + if mode == 'output' and not normalize_output_path(facefusion.globals.target_path, facefusion.globals.output_path): + logger.error(wording.get('select_file_or_directory_output') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def post_process() -> None: + read_static_image.cache_clear() + if facefusion.globals.video_memory_strategy == 'strict' or facefusion.globals.video_memory_strategy == 'moderate': + clear_frame_processor() + if facefusion.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() + clear_face_occluder() + + +def enhance_face(target_face: Face, temp_vision_frame : VisionFrame) -> VisionFrame: + model_template = get_options('model').get('template') + model_size = get_options('model').get('size') + crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmarks.get('5/68'), model_template, model_size) + box_mask = create_static_box_mask(crop_vision_frame.shape[:2][::-1], facefusion.globals.face_mask_blur, (0, 0, 0, 0)) + crop_mask_list =\ + [ + box_mask + ] + + if 'occlusion' in facefusion.globals.face_mask_types: + occlusion_mask = create_occlusion_mask(crop_vision_frame) + crop_mask_list.append(occlusion_mask) + crop_vision_frame = prepare_crop_frame(crop_vision_frame) + crop_vision_frame = apply_enhance(crop_vision_frame) + crop_vision_frame = normalize_crop_frame(crop_vision_frame) + crop_mask = numpy.minimum.reduce(crop_mask_list).clip(0, 1) + paste_vision_frame = paste_back(temp_vision_frame, crop_vision_frame, crop_mask, affine_matrix) + temp_vision_frame = blend_frame(temp_vision_frame, paste_vision_frame) + return temp_vision_frame + + +def apply_enhance(crop_vision_frame : VisionFrame) -> VisionFrame: + frame_processor = get_frame_processor() + frame_processor_inputs = {} + + for frame_processor_input in frame_processor.get_inputs(): + if frame_processor_input.name == 'input': + frame_processor_inputs[frame_processor_input.name] = crop_vision_frame + if frame_processor_input.name == 'weight': + weight = numpy.array([ 1 ]).astype(numpy.double) + frame_processor_inputs[frame_processor_input.name] = weight + with thread_semaphore(): + crop_vision_frame = frame_processor.run(None, frame_processor_inputs)[0][0] + return crop_vision_frame + + +def prepare_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: + crop_vision_frame = crop_vision_frame[:, :, ::-1] / 255.0 + crop_vision_frame = (crop_vision_frame - 0.5) / 0.5 + crop_vision_frame = numpy.expand_dims(crop_vision_frame.transpose(2, 0, 1), axis = 0).astype(numpy.float32) + return crop_vision_frame + + +def normalize_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: + crop_vision_frame = numpy.clip(crop_vision_frame, -1, 1) + crop_vision_frame = (crop_vision_frame + 1) / 2 + crop_vision_frame = crop_vision_frame.transpose(1, 2, 0) + crop_vision_frame = (crop_vision_frame * 255.0).round() + crop_vision_frame = crop_vision_frame.astype(numpy.uint8)[:, :, ::-1] + return crop_vision_frame + + +def blend_frame(temp_vision_frame : VisionFrame, paste_vision_frame : VisionFrame) -> VisionFrame: + face_enhancer_blend = 1 - (frame_processors_globals.face_enhancer_blend / 100) + temp_vision_frame = cv2.addWeighted(temp_vision_frame, face_enhancer_blend, paste_vision_frame, 1 - face_enhancer_blend, 0) + return temp_vision_frame + + +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + return enhance_face(target_face, temp_vision_frame) + + +def process_frame(inputs : FaceEnhancerInputs) -> VisionFrame: + reference_faces = inputs.get('reference_faces') + target_vision_frame = inputs.get('target_vision_frame') + + if facefusion.globals.face_selector_mode == 'many': + many_faces = get_many_faces(target_vision_frame) + if many_faces: + for target_face in many_faces: + target_vision_frame = enhance_face(target_face, target_vision_frame) + if facefusion.globals.face_selector_mode == 'one': + target_face = get_one_face(target_vision_frame) + if target_face: + target_vision_frame = enhance_face(target_face, target_vision_frame) + if facefusion.globals.face_selector_mode == 'reference': + similar_faces = find_similar_faces(reference_faces, target_vision_frame, facefusion.globals.reference_face_distance) + if similar_faces: + for similar_face in similar_faces: + target_vision_frame = enhance_face(similar_face, target_vision_frame) + return target_vision_frame + + +def process_frames(source_path : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: + reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None + + for queue_payload in process_manager.manage(queue_payloads): + target_vision_path = queue_payload['frame_path'] + target_vision_frame = read_image(target_vision_path) + output_vision_frame = process_frame( + { + 'reference_faces': reference_faces, + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, output_vision_frame) + update_progress(1) + + +def process_image(source_path : str, target_path : str, output_path : str) -> None: + reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None + target_vision_frame = read_static_image(target_path) + output_vision_frame = process_frame( + { + 'reference_faces': reference_faces, + 'target_vision_frame': target_vision_frame + }) + write_image(output_path, output_vision_frame) + + +def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: + frame_processors.multi_process_frames(None, temp_frame_paths, process_frames) diff --git a/facefusion/processors/frame/modules/face_swapper.py b/facefusion/processors/frame/modules/face_swapper.py new file mode 100644 index 0000000000000000000000000000000000000000..3f9f6a95706545fe71c53d227723e8db1e68543b --- /dev/null +++ b/facefusion/processors/frame/modules/face_swapper.py @@ -0,0 +1,370 @@ +from typing import Any, List, Literal, Optional +from argparse import ArgumentParser +from time import sleep +import platform +import numpy +import onnx +import onnxruntime +from onnx import numpy_helper + +import facefusion.globals +import facefusion.processors.frame.core as frame_processors +from facefusion import config, process_manager, logger, wording +from facefusion.execution import apply_execution_provider_options +from facefusion.face_analyser import get_one_face, get_average_face, get_many_faces, find_similar_faces, clear_face_analyser +from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, create_region_mask, clear_face_occluder, clear_face_parser +from facefusion.face_helper import warp_face_by_face_landmark_5, paste_back +from facefusion.face_store import get_reference_faces +from facefusion.content_analyser import clear_content_analyser +from facefusion.normalizer import normalize_output_path +from facefusion.thread_helper import thread_lock, conditional_thread_semaphore +from facefusion.typing import Face, Embedding, VisionFrame, UpdateProgress, ProcessMode, ModelSet, OptionsWithModel, QueuePayload +from facefusion.filesystem import is_file, is_image, has_image, is_video, filter_image_paths, resolve_relative_path +from facefusion.download import conditional_download, is_download_done +from facefusion.vision import read_image, read_static_image, read_static_images, write_image +from facefusion.processors.frame.typings import FaceSwapperInputs +from facefusion.processors.frame import globals as frame_processors_globals +from facefusion.processors.frame import choices as frame_processors_choices + +FRAME_PROCESSOR = None +MODEL_INITIALIZER = None +NAME = __name__.upper() +MODELS : ModelSet =\ +{ + 'blendswap_256': + { + 'type': 'blendswap', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/blendswap_256.onnx', + 'path': resolve_relative_path('../.assets/models/blendswap_256.onnx'), + 'template': 'ffhq_512', + 'size': (256, 256), + 'mean': [ 0.0, 0.0, 0.0 ], + 'standard_deviation': [ 1.0, 1.0, 1.0 ] + }, + 'inswapper_128': + { + 'type': 'inswapper', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/inswapper_128.onnx', + 'path': resolve_relative_path('../.assets/models/inswapper_128.onnx'), + 'template': 'arcface_128_v2', + 'size': (128, 128), + 'mean': [ 0.0, 0.0, 0.0 ], + 'standard_deviation': [ 1.0, 1.0, 1.0 ] + }, + 'inswapper_128_fp16': + { + 'type': 'inswapper', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/inswapper_128_fp16.onnx', + 'path': resolve_relative_path('../.assets/models/inswapper_128_fp16.onnx'), + 'template': 'arcface_128_v2', + 'size': (128, 128), + 'mean': [ 0.0, 0.0, 0.0 ], + 'standard_deviation': [ 1.0, 1.0, 1.0 ] + }, + 'simswap_256': + { + 'type': 'simswap', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/simswap_256.onnx', + 'path': resolve_relative_path('../.assets/models/simswap_256.onnx'), + 'template': 'arcface_112_v1', + 'size': (256, 256), + 'mean': [ 0.485, 0.456, 0.406 ], + 'standard_deviation': [ 0.229, 0.224, 0.225 ] + }, + 'simswap_512_unofficial': + { + 'type': 'simswap', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/simswap_512_unofficial.onnx', + 'path': resolve_relative_path('../.assets/models/simswap_512_unofficial.onnx'), + 'template': 'arcface_112_v1', + 'size': (512, 512), + 'mean': [ 0.0, 0.0, 0.0 ], + 'standard_deviation': [ 1.0, 1.0, 1.0 ] + }, + 'uniface_256': + { + 'type': 'uniface', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/uniface_256.onnx', + 'path': resolve_relative_path('../.assets/models/uniface_256.onnx'), + 'template': 'ffhq_512', + 'size': (256, 256), + 'mean': [ 0.0, 0.0, 0.0 ], + 'standard_deviation': [ 1.0, 1.0, 1.0 ] + } +} +OPTIONS : Optional[OptionsWithModel] = None + + +def get_frame_processor() -> Any: + global FRAME_PROCESSOR + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if FRAME_PROCESSOR is None: + model_path = get_options('model').get('path') + FRAME_PROCESSOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + return FRAME_PROCESSOR + + +def clear_frame_processor() -> None: + global FRAME_PROCESSOR + + FRAME_PROCESSOR = None + + +def get_model_initializer() -> Any: + global MODEL_INITIALIZER + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if MODEL_INITIALIZER is None: + model_path = get_options('model').get('path') + model = onnx.load(model_path) + MODEL_INITIALIZER = numpy_helper.to_array(model.graph.initializer[-1]) + return MODEL_INITIALIZER + + +def clear_model_initializer() -> None: + global MODEL_INITIALIZER + + MODEL_INITIALIZER = None + + +def get_options(key : Literal['model']) -> Any: + global OPTIONS + + if OPTIONS is None: + OPTIONS =\ + { + 'model': MODELS[frame_processors_globals.face_swapper_model] + } + return OPTIONS.get(key) + + +def set_options(key : Literal['model'], value : Any) -> None: + global OPTIONS + + OPTIONS[key] = value + + +def register_args(program : ArgumentParser) -> None: + if platform.system().lower() == 'darwin': + face_swapper_model_fallback = 'inswapper_128' + else: + face_swapper_model_fallback = 'inswapper_128_fp16' + program.add_argument('--face-swapper-model', help = wording.get('help.face_swapper_model'), default = config.get_str_value('frame_processors.face_swapper_model', face_swapper_model_fallback), choices = frame_processors_choices.face_swapper_models) + + +def apply_args(program : ArgumentParser) -> None: + args = program.parse_args() + frame_processors_globals.face_swapper_model = args.face_swapper_model + if args.face_swapper_model == 'blendswap_256': + facefusion.globals.face_recognizer_model = 'arcface_blendswap' + if args.face_swapper_model == 'inswapper_128' or args.face_swapper_model == 'inswapper_128_fp16': + facefusion.globals.face_recognizer_model = 'arcface_inswapper' + if args.face_swapper_model == 'simswap_256' or args.face_swapper_model == 'simswap_512_unofficial': + facefusion.globals.face_recognizer_model = 'arcface_simswap' + if args.face_swapper_model == 'uniface_256': + facefusion.globals.face_recognizer_model = 'arcface_uniface' + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../.assets/models') + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not facefusion.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, [ model_url ]) + process_manager.end() + return is_file(model_path) + + +def post_check() -> bool: + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not facefusion.globals.skip_download and not is_download_done(model_url, model_path): + logger.error(wording.get('model_download_not_done') + wording.get('exclamation_mark'), NAME) + return False + if not is_file(model_path): + logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def pre_process(mode : ProcessMode) -> bool: + if not has_image(facefusion.globals.source_paths): + logger.error(wording.get('select_image_source') + wording.get('exclamation_mark'), NAME) + return False + source_image_paths = filter_image_paths(facefusion.globals.source_paths) + source_frames = read_static_images(source_image_paths) + for source_frame in source_frames: + if not get_one_face(source_frame): + logger.error(wording.get('no_source_face_detected') + wording.get('exclamation_mark'), NAME) + return False + if mode in [ 'output', 'preview' ] and not is_image(facefusion.globals.target_path) and not is_video(facefusion.globals.target_path): + logger.error(wording.get('select_image_or_video_target') + wording.get('exclamation_mark'), NAME) + return False + if mode == 'output' and not normalize_output_path(facefusion.globals.target_path, facefusion.globals.output_path): + logger.error(wording.get('select_file_or_directory_output') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def post_process() -> None: + read_static_image.cache_clear() + if facefusion.globals.video_memory_strategy == 'strict' or facefusion.globals.video_memory_strategy == 'moderate': + clear_model_initializer() + clear_frame_processor() + if facefusion.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() + clear_face_occluder() + clear_face_parser() + + +def swap_face(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + model_template = get_options('model').get('template') + model_size = get_options('model').get('size') + crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmarks.get('5/68'), model_template, model_size) + crop_mask_list = [] + + if 'box' in facefusion.globals.face_mask_types: + box_mask = create_static_box_mask(crop_vision_frame.shape[:2][::-1], facefusion.globals.face_mask_blur, facefusion.globals.face_mask_padding) + crop_mask_list.append(box_mask) + if 'occlusion' in facefusion.globals.face_mask_types: + occlusion_mask = create_occlusion_mask(crop_vision_frame) + crop_mask_list.append(occlusion_mask) + crop_vision_frame = prepare_crop_frame(crop_vision_frame) + crop_vision_frame = apply_swap(source_face, crop_vision_frame) + crop_vision_frame = normalize_crop_frame(crop_vision_frame) + if 'region' in facefusion.globals.face_mask_types: + region_mask = create_region_mask(crop_vision_frame, facefusion.globals.face_mask_regions) + crop_mask_list.append(region_mask) + crop_mask = numpy.minimum.reduce(crop_mask_list).clip(0, 1) + temp_vision_frame = paste_back(temp_vision_frame, crop_vision_frame, crop_mask, affine_matrix) + return temp_vision_frame + + +def apply_swap(source_face : Face, crop_vision_frame : VisionFrame) -> VisionFrame: + frame_processor = get_frame_processor() + model_type = get_options('model').get('type') + frame_processor_inputs = {} + + for frame_processor_input in frame_processor.get_inputs(): + if frame_processor_input.name == 'source': + if model_type == 'blendswap' or model_type == 'uniface': + frame_processor_inputs[frame_processor_input.name] = prepare_source_frame(source_face) + else: + frame_processor_inputs[frame_processor_input.name] = prepare_source_embedding(source_face) + if frame_processor_input.name == 'target': + frame_processor_inputs[frame_processor_input.name] = crop_vision_frame + with conditional_thread_semaphore(facefusion.globals.execution_providers): + crop_vision_frame = frame_processor.run(None, frame_processor_inputs)[0][0] + return crop_vision_frame + + +def prepare_source_frame(source_face : Face) -> VisionFrame: + model_type = get_options('model').get('type') + source_vision_frame = read_static_image(facefusion.globals.source_paths[0]) + if model_type == 'blendswap': + source_vision_frame, _ = warp_face_by_face_landmark_5(source_vision_frame, source_face.landmarks.get('5/68'), 'arcface_112_v2', (112, 112)) + if model_type == 'uniface': + source_vision_frame, _ = warp_face_by_face_landmark_5(source_vision_frame, source_face.landmarks.get('5/68'), 'ffhq_512', (256, 256)) + source_vision_frame = source_vision_frame[:, :, ::-1] / 255.0 + source_vision_frame = source_vision_frame.transpose(2, 0, 1) + source_vision_frame = numpy.expand_dims(source_vision_frame, axis = 0).astype(numpy.float32) + return source_vision_frame + + +def prepare_source_embedding(source_face : Face) -> Embedding: + model_type = get_options('model').get('type') + if model_type == 'inswapper': + model_initializer = get_model_initializer() + source_embedding = source_face.embedding.reshape((1, -1)) + source_embedding = numpy.dot(source_embedding, model_initializer) / numpy.linalg.norm(source_embedding) + else: + source_embedding = source_face.normed_embedding.reshape(1, -1) + return source_embedding + + +def prepare_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: + model_mean = get_options('model').get('mean') + model_standard_deviation = get_options('model').get('standard_deviation') + crop_vision_frame = crop_vision_frame[:, :, ::-1] / 255.0 + crop_vision_frame = (crop_vision_frame - model_mean) / model_standard_deviation + crop_vision_frame = crop_vision_frame.transpose(2, 0, 1) + crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0).astype(numpy.float32) + return crop_vision_frame + + +def normalize_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: + crop_vision_frame = crop_vision_frame.transpose(1, 2, 0) + crop_vision_frame = (crop_vision_frame * 255.0).round() + crop_vision_frame = crop_vision_frame[:, :, ::-1] + return crop_vision_frame + + +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + return swap_face(source_face, target_face, temp_vision_frame) + + +def process_frame(inputs : FaceSwapperInputs) -> VisionFrame: + reference_faces = inputs.get('reference_faces') + source_face = inputs.get('source_face') + target_vision_frame = inputs.get('target_vision_frame') + + if facefusion.globals.face_selector_mode == 'many': + many_faces = get_many_faces(target_vision_frame) + if many_faces: + for target_face in many_faces: + target_vision_frame = swap_face(source_face, target_face, target_vision_frame) + if facefusion.globals.face_selector_mode == 'one': + target_face = get_one_face(target_vision_frame) + if target_face: + target_vision_frame = swap_face(source_face, target_face, target_vision_frame) + if facefusion.globals.face_selector_mode == 'reference': + similar_faces = find_similar_faces(reference_faces, target_vision_frame, facefusion.globals.reference_face_distance) + if similar_faces: + for similar_face in similar_faces: + target_vision_frame = swap_face(source_face, similar_face, target_vision_frame) + return target_vision_frame + + +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: + reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None + source_frames = read_static_images(source_paths) + source_face = get_average_face(source_frames) + + for queue_payload in process_manager.manage(queue_payloads): + target_vision_path = queue_payload['frame_path'] + target_vision_frame = read_image(target_vision_path) + output_vision_frame = process_frame( + { + 'reference_faces': reference_faces, + 'source_face': source_face, + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, output_vision_frame) + update_progress(1) + + +def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: + reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None + source_frames = read_static_images(source_paths) + source_face = get_average_face(source_frames) + target_vision_frame = read_static_image(target_path) + output_vision_frame = process_frame( + { + 'reference_faces': reference_faces, + 'source_face': source_face, + 'target_vision_frame': target_vision_frame + }) + write_image(output_path, output_vision_frame) + + +def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: + frame_processors.multi_process_frames(source_paths, temp_frame_paths, process_frames) diff --git a/facefusion/processors/frame/modules/frame_colorizer.py b/facefusion/processors/frame/modules/frame_colorizer.py new file mode 100644 index 0000000000000000000000000000000000000000..3c8be9cc119c8ec3b41e3bc7c993c5c80cf3cc4d --- /dev/null +++ b/facefusion/processors/frame/modules/frame_colorizer.py @@ -0,0 +1,241 @@ +from typing import Any, List, Literal, Optional +from argparse import ArgumentParser +from time import sleep +import cv2 +import numpy +import onnxruntime + +import facefusion.globals +import facefusion.processors.frame.core as frame_processors +from facefusion import config, process_manager, logger, wording +from facefusion.face_analyser import clear_face_analyser +from facefusion.content_analyser import clear_content_analyser +from facefusion.execution import apply_execution_provider_options +from facefusion.normalizer import normalize_output_path +from facefusion.thread_helper import thread_lock, thread_semaphore +from facefusion.typing import Face, VisionFrame, UpdateProgress, ProcessMode, ModelSet, OptionsWithModel, QueuePayload +from facefusion.common_helper import create_metavar +from facefusion.filesystem import is_file, resolve_relative_path, is_image, is_video +from facefusion.download import conditional_download, is_download_done +from facefusion.vision import read_image, read_static_image, write_image, unpack_resolution +from facefusion.processors.frame.typings import FrameColorizerInputs +from facefusion.processors.frame import globals as frame_processors_globals +from facefusion.processors.frame import choices as frame_processors_choices + +FRAME_PROCESSOR = None +NAME = __name__.upper() +MODELS : ModelSet =\ +{ + 'ddcolor': + { + 'type': 'ddcolor', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/ddcolor.onnx', + 'path': resolve_relative_path('../.assets/models/ddcolor.onnx') + }, + 'ddcolor_artistic': + { + 'type': 'ddcolor', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/ddcolor_artistic.onnx', + 'path': resolve_relative_path('../.assets/models/ddcolor_artistic.onnx') + }, + 'deoldify': + { + 'type': 'deoldify', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/deoldify.onnx', + 'path': resolve_relative_path('../.assets/models/deoldify.onnx') + }, + 'deoldify_artistic': + { + 'type': 'deoldify', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/deoldify_artistic.onnx', + 'path': resolve_relative_path('../.assets/models/deoldify_artistic.onnx') + }, + 'deoldify_stable': + { + 'type': 'deoldify', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/deoldify_stable.onnx', + 'path': resolve_relative_path('../.assets/models/deoldify_stable.onnx') + } +} +OPTIONS : Optional[OptionsWithModel] = None + + +def get_frame_processor() -> Any: + global FRAME_PROCESSOR + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if FRAME_PROCESSOR is None: + model_path = get_options('model').get('path') + FRAME_PROCESSOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + return FRAME_PROCESSOR + + +def clear_frame_processor() -> None: + global FRAME_PROCESSOR + + FRAME_PROCESSOR = None + + +def get_options(key : Literal['model']) -> Any: + global OPTIONS + + if OPTIONS is None: + OPTIONS =\ + { + 'model': MODELS[frame_processors_globals.frame_colorizer_model] + } + return OPTIONS.get(key) + + +def set_options(key : Literal['model'], value : Any) -> None: + global OPTIONS + + OPTIONS[key] = value + + +def register_args(program : ArgumentParser) -> None: + program.add_argument('--frame-colorizer-model', help = wording.get('help.frame_colorizer_model'), default = config.get_str_value('frame_processors.frame_colorizer_model', 'ddcolor'), choices = frame_processors_choices.frame_colorizer_models) + program.add_argument('--frame-colorizer-blend', help = wording.get('help.frame_colorizer_blend'), type = int, default = config.get_int_value('frame_processors.frame_colorizer_blend', '100'), choices = frame_processors_choices.frame_colorizer_blend_range, metavar = create_metavar(frame_processors_choices.frame_colorizer_blend_range)) + program.add_argument('--frame-colorizer-size', help = wording.get('help.frame_colorizer_size'), type = str, default = config.get_str_value('frame_processors.frame_colorizer_size', '256x256'), choices = frame_processors_choices.frame_colorizer_sizes) + + +def apply_args(program : ArgumentParser) -> None: + args = program.parse_args() + frame_processors_globals.frame_colorizer_model = args.frame_colorizer_model + frame_processors_globals.frame_colorizer_blend = args.frame_colorizer_blend + frame_processors_globals.frame_colorizer_size = args.frame_colorizer_size + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../.assets/models') + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not facefusion.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, [ model_url ]) + process_manager.end() + return is_file(model_path) + + +def post_check() -> bool: + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not facefusion.globals.skip_download and not is_download_done(model_url, model_path): + logger.error(wording.get('model_download_not_done') + wording.get('exclamation_mark'), NAME) + return False + if not is_file(model_path): + logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def pre_process(mode : ProcessMode) -> bool: + if mode in [ 'output', 'preview' ] and not is_image(facefusion.globals.target_path) and not is_video(facefusion.globals.target_path): + logger.error(wording.get('select_image_or_video_target') + wording.get('exclamation_mark'), NAME) + return False + if mode == 'output' and not normalize_output_path(facefusion.globals.target_path, facefusion.globals.output_path): + logger.error(wording.get('select_file_or_directory_output') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def post_process() -> None: + read_static_image.cache_clear() + if facefusion.globals.video_memory_strategy == 'strict' or facefusion.globals.video_memory_strategy == 'moderate': + clear_frame_processor() + if facefusion.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() + + +def colorize_frame(temp_vision_frame : VisionFrame) -> VisionFrame: + frame_processor = get_frame_processor() + prepare_vision_frame = prepare_temp_frame(temp_vision_frame) + with thread_semaphore(): + color_vision_frame = frame_processor.run(None, + { + frame_processor.get_inputs()[0].name: prepare_vision_frame + })[0][0] + color_vision_frame = merge_color_frame(temp_vision_frame, color_vision_frame) + color_vision_frame = blend_frame(temp_vision_frame, color_vision_frame) + return color_vision_frame + + +def prepare_temp_frame(temp_vision_frame : VisionFrame) -> VisionFrame: + model_size = unpack_resolution(frame_processors_globals.frame_colorizer_size) + model_type = get_options('model').get('type') + temp_vision_frame = cv2.cvtColor(temp_vision_frame, cv2.COLOR_BGR2GRAY) + temp_vision_frame = cv2.cvtColor(temp_vision_frame, cv2.COLOR_GRAY2RGB) + if model_type == 'ddcolor': + temp_vision_frame = (temp_vision_frame / 255.0).astype(numpy.float32) + temp_vision_frame = cv2.cvtColor(temp_vision_frame, cv2.COLOR_RGB2LAB)[:, :, :1] + temp_vision_frame = numpy.concatenate((temp_vision_frame, numpy.zeros_like(temp_vision_frame), numpy.zeros_like(temp_vision_frame)), axis = -1) + temp_vision_frame = cv2.cvtColor(temp_vision_frame, cv2.COLOR_LAB2RGB) + temp_vision_frame = cv2.resize(temp_vision_frame, model_size) + temp_vision_frame = temp_vision_frame.transpose((2, 0, 1)) + temp_vision_frame = numpy.expand_dims(temp_vision_frame, axis = 0).astype(numpy.float32) + return temp_vision_frame + + +def merge_color_frame(temp_vision_frame : VisionFrame, color_vision_frame : VisionFrame) -> VisionFrame: + model_type = get_options('model').get('type') + color_vision_frame = color_vision_frame.transpose(1, 2, 0) + color_vision_frame = cv2.resize(color_vision_frame, (temp_vision_frame.shape[1], temp_vision_frame.shape[0])) + if model_type == 'ddcolor': + temp_vision_frame = (temp_vision_frame / 255.0).astype(numpy.float32) + temp_vision_frame = cv2.cvtColor(temp_vision_frame, cv2.COLOR_BGR2LAB)[:, :, :1] + color_vision_frame = numpy.concatenate((temp_vision_frame, color_vision_frame), axis = -1) + color_vision_frame = cv2.cvtColor(color_vision_frame, cv2.COLOR_LAB2BGR) + color_vision_frame = (color_vision_frame * 255.0).round().astype(numpy.uint8) + if model_type == 'deoldify': + temp_blue_channel, _, _ = cv2.split(temp_vision_frame) + color_vision_frame = cv2.cvtColor(color_vision_frame, cv2.COLOR_BGR2RGB).astype(numpy.uint8) + color_vision_frame = cv2.cvtColor(color_vision_frame, cv2.COLOR_BGR2LAB) + _, color_green_channel, color_red_channel = cv2.split(color_vision_frame) + color_vision_frame = cv2.merge((temp_blue_channel, color_green_channel, color_red_channel)) + color_vision_frame = cv2.cvtColor(color_vision_frame, cv2.COLOR_LAB2BGR) + return color_vision_frame + + +def blend_frame(temp_vision_frame : VisionFrame, paste_vision_frame : VisionFrame) -> VisionFrame: + frame_colorizer_blend = 1 - (frame_processors_globals.frame_colorizer_blend / 100) + temp_vision_frame = cv2.addWeighted(temp_vision_frame, frame_colorizer_blend, paste_vision_frame, 1 - frame_colorizer_blend, 0) + return temp_vision_frame + + +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + pass + + +def process_frame(inputs : FrameColorizerInputs) -> VisionFrame: + target_vision_frame = inputs.get('target_vision_frame') + return colorize_frame(target_vision_frame) + + +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: + for queue_payload in process_manager.manage(queue_payloads): + target_vision_path = queue_payload['frame_path'] + target_vision_frame = read_image(target_vision_path) + output_vision_frame = process_frame( + { + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, output_vision_frame) + update_progress(1) + + +def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: + target_vision_frame = read_static_image(target_path) + output_vision_frame = process_frame( + { + 'target_vision_frame': target_vision_frame + }) + write_image(output_path, output_vision_frame) + + +def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: + frame_processors.multi_process_frames(None, temp_frame_paths, process_frames) diff --git a/facefusion/processors/frame/modules/frame_enhancer.py b/facefusion/processors/frame/modules/frame_enhancer.py new file mode 100644 index 0000000000000000000000000000000000000000..91375a5d2f9d37d2649d2d41fe12bb18cbde54ed --- /dev/null +++ b/facefusion/processors/frame/modules/frame_enhancer.py @@ -0,0 +1,249 @@ +from typing import Any, List, Literal, Optional +from argparse import ArgumentParser +from time import sleep +import cv2 +import numpy +import onnxruntime + +import facefusion.globals +import facefusion.processors.frame.core as frame_processors +from facefusion import config, process_manager, logger, wording +from facefusion.face_analyser import clear_face_analyser +from facefusion.content_analyser import clear_content_analyser +from facefusion.execution import apply_execution_provider_options +from facefusion.normalizer import normalize_output_path +from facefusion.thread_helper import thread_lock, conditional_thread_semaphore +from facefusion.typing import Face, VisionFrame, UpdateProgress, ProcessMode, ModelSet, OptionsWithModel, QueuePayload +from facefusion.common_helper import create_metavar +from facefusion.filesystem import is_file, resolve_relative_path, is_image, is_video +from facefusion.download import conditional_download, is_download_done +from facefusion.vision import read_image, read_static_image, write_image, merge_tile_frames, create_tile_frames +from facefusion.processors.frame.typings import FrameEnhancerInputs +from facefusion.processors.frame import globals as frame_processors_globals +from facefusion.processors.frame import choices as frame_processors_choices + +FRAME_PROCESSOR = None +NAME = __name__.upper() +MODELS : ModelSet =\ +{ + 'lsdir_x4': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/lsdir_x4.onnx', + 'path': resolve_relative_path('../.assets/models/lsdir_x4.onnx'), + 'size': (128, 8, 2), + 'scale': 4 + }, + 'nomos8k_sc_x4': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/nomos8k_sc_x4.onnx', + 'path': resolve_relative_path('../.assets/models/nomos8k_sc_x4.onnx'), + 'size': (128, 8, 2), + 'scale': 4 + }, + 'real_esrgan_x2': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_esrgan_x2.onnx', + 'path': resolve_relative_path('../.assets/models/real_esrgan_x2.onnx'), + 'size': (128, 8, 2), + 'scale': 2 + }, + 'real_esrgan_x2_fp16': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_esrgan_x2_fp16.onnx', + 'path': resolve_relative_path('../.assets/models/real_esrgan_x2_fp16.onnx'), + 'size': (128, 8, 2), + 'scale': 2 + }, + 'real_esrgan_x4': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_esrgan_x4.onnx', + 'path': resolve_relative_path('../.assets/models/real_esrgan_x4.onnx'), + 'size': (128, 8, 2), + 'scale': 4 + }, + 'real_esrgan_x4_fp16': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_esrgan_x4_fp16.onnx', + 'path': resolve_relative_path('../.assets/models/real_esrgan_x4_fp16.onnx'), + 'size': (128, 8, 2), + 'scale': 4 + }, + 'real_hatgan_x4': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_hatgan_x4.onnx', + 'path': resolve_relative_path('../.assets/models/real_hatgan_x4.onnx'), + 'size': (256, 8, 2), + 'scale': 4 + }, + 'span_kendata_x4': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/span_kendata_x4.onnx', + 'path': resolve_relative_path('../.assets/models/span_kendata_x4.onnx'), + 'size': (128, 8, 2), + 'scale': 4 + } +} +OPTIONS : Optional[OptionsWithModel] = None + + +def get_frame_processor() -> Any: + global FRAME_PROCESSOR + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if FRAME_PROCESSOR is None: + model_path = get_options('model').get('path') + FRAME_PROCESSOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + return FRAME_PROCESSOR + + +def clear_frame_processor() -> None: + global FRAME_PROCESSOR + + FRAME_PROCESSOR = None + + +def get_options(key : Literal['model']) -> Any: + global OPTIONS + + if OPTIONS is None: + OPTIONS =\ + { + 'model': MODELS[frame_processors_globals.frame_enhancer_model] + } + return OPTIONS.get(key) + + +def set_options(key : Literal['model'], value : Any) -> None: + global OPTIONS + + OPTIONS[key] = value + + +def register_args(program : ArgumentParser) -> None: + program.add_argument('--frame-enhancer-model', help = wording.get('help.frame_enhancer_model'), default = config.get_str_value('frame_processors.frame_enhancer_model', 'span_kendata_x4'), choices = frame_processors_choices.frame_enhancer_models) + program.add_argument('--frame-enhancer-blend', help = wording.get('help.frame_enhancer_blend'), type = int, default = config.get_int_value('frame_processors.frame_enhancer_blend', '80'), choices = frame_processors_choices.frame_enhancer_blend_range, metavar = create_metavar(frame_processors_choices.frame_enhancer_blend_range)) + + +def apply_args(program : ArgumentParser) -> None: + args = program.parse_args() + frame_processors_globals.frame_enhancer_model = args.frame_enhancer_model + frame_processors_globals.frame_enhancer_blend = args.frame_enhancer_blend + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../.assets/models') + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not facefusion.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, [ model_url ]) + process_manager.end() + return is_file(model_path) + + +def post_check() -> bool: + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not facefusion.globals.skip_download and not is_download_done(model_url, model_path): + logger.error(wording.get('model_download_not_done') + wording.get('exclamation_mark'), NAME) + return False + if not is_file(model_path): + logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def pre_process(mode : ProcessMode) -> bool: + if mode in [ 'output', 'preview' ] and not is_image(facefusion.globals.target_path) and not is_video(facefusion.globals.target_path): + logger.error(wording.get('select_image_or_video_target') + wording.get('exclamation_mark'), NAME) + return False + if mode == 'output' and not normalize_output_path(facefusion.globals.target_path, facefusion.globals.output_path): + logger.error(wording.get('select_file_or_directory_output') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def post_process() -> None: + read_static_image.cache_clear() + if facefusion.globals.video_memory_strategy == 'strict' or facefusion.globals.video_memory_strategy == 'moderate': + clear_frame_processor() + if facefusion.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() + + +def enhance_frame(temp_vision_frame : VisionFrame) -> VisionFrame: + frame_processor = get_frame_processor() + size = get_options('model').get('size') + scale = get_options('model').get('scale') + temp_height, temp_width = temp_vision_frame.shape[:2] + tile_vision_frames, pad_width, pad_height = create_tile_frames(temp_vision_frame, size) + + for index, tile_vision_frame in enumerate(tile_vision_frames): + with conditional_thread_semaphore(facefusion.globals.execution_providers): + tile_vision_frame = frame_processor.run(None, + { + frame_processor.get_inputs()[0].name : prepare_tile_frame(tile_vision_frame) + })[0] + tile_vision_frames[index] = normalize_tile_frame(tile_vision_frame) + merge_vision_frame = merge_tile_frames(tile_vision_frames, temp_width * scale, temp_height * scale, pad_width * scale, pad_height * scale, (size[0] * scale, size[1] * scale, size[2] * scale)) + temp_vision_frame = blend_frame(temp_vision_frame, merge_vision_frame) + return temp_vision_frame + + +def prepare_tile_frame(vision_tile_frame : VisionFrame) -> VisionFrame: + vision_tile_frame = numpy.expand_dims(vision_tile_frame[:, :, ::-1], axis = 0) + vision_tile_frame = vision_tile_frame.transpose(0, 3, 1, 2) + vision_tile_frame = vision_tile_frame.astype(numpy.float32) / 255 + return vision_tile_frame + + +def normalize_tile_frame(vision_tile_frame : VisionFrame) -> VisionFrame: + vision_tile_frame = vision_tile_frame.transpose(0, 2, 3, 1).squeeze(0) * 255 + vision_tile_frame = vision_tile_frame.clip(0, 255).astype(numpy.uint8)[:, :, ::-1] + return vision_tile_frame + + +def blend_frame(temp_vision_frame : VisionFrame, merge_vision_frame : VisionFrame) -> VisionFrame: + frame_enhancer_blend = 1 - (frame_processors_globals.frame_enhancer_blend / 100) + temp_vision_frame = cv2.resize(temp_vision_frame, (merge_vision_frame.shape[1], merge_vision_frame.shape[0])) + temp_vision_frame = cv2.addWeighted(temp_vision_frame, frame_enhancer_blend, merge_vision_frame, 1 - frame_enhancer_blend, 0) + return temp_vision_frame + + +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + pass + + +def process_frame(inputs : FrameEnhancerInputs) -> VisionFrame: + target_vision_frame = inputs.get('target_vision_frame') + return enhance_frame(target_vision_frame) + + +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: + for queue_payload in process_manager.manage(queue_payloads): + target_vision_path = queue_payload['frame_path'] + target_vision_frame = read_image(target_vision_path) + output_vision_frame = process_frame( + { + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, output_vision_frame) + update_progress(1) + + +def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: + target_vision_frame = read_static_image(target_path) + output_vision_frame = process_frame( + { + 'target_vision_frame': target_vision_frame + }) + write_image(output_path, output_vision_frame) + + +def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: + frame_processors.multi_process_frames(None, temp_frame_paths, process_frames) diff --git a/facefusion/processors/frame/modules/lip_syncer.py b/facefusion/processors/frame/modules/lip_syncer.py new file mode 100644 index 0000000000000000000000000000000000000000..410a38b1c77a55e1fcce3c29aed309ec4d204cab --- /dev/null +++ b/facefusion/processors/frame/modules/lip_syncer.py @@ -0,0 +1,260 @@ +from typing import Any, List, Literal, Optional +from argparse import ArgumentParser +from time import sleep +import cv2 +import numpy +import onnxruntime + +import facefusion.globals +import facefusion.processors.frame.core as frame_processors +from facefusion import config, process_manager, logger, wording +from facefusion.execution import apply_execution_provider_options +from facefusion.face_analyser import get_one_face, get_many_faces, find_similar_faces, clear_face_analyser +from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, create_mouth_mask, clear_face_occluder, clear_face_parser +from facefusion.face_helper import warp_face_by_face_landmark_5, warp_face_by_bounding_box, paste_back, create_bounding_box_from_face_landmark_68 +from facefusion.face_store import get_reference_faces +from facefusion.content_analyser import clear_content_analyser +from facefusion.normalizer import normalize_output_path +from facefusion.thread_helper import thread_lock, conditional_thread_semaphore +from facefusion.typing import Face, VisionFrame, UpdateProgress, ProcessMode, ModelSet, OptionsWithModel, AudioFrame, QueuePayload +from facefusion.filesystem import is_file, has_audio, resolve_relative_path +from facefusion.download import conditional_download, is_download_done +from facefusion.audio import read_static_voice, get_voice_frame, create_empty_audio_frame +from facefusion.filesystem import is_image, is_video, filter_audio_paths +from facefusion.common_helper import get_first +from facefusion.vision import read_image, read_static_image, write_image, restrict_video_fps +from facefusion.processors.frame.typings import LipSyncerInputs +from facefusion.voice_extractor import clear_voice_extractor +from facefusion.processors.frame import globals as frame_processors_globals +from facefusion.processors.frame import choices as frame_processors_choices + +FRAME_PROCESSOR = None +NAME = __name__.upper() +MODELS : ModelSet =\ +{ + 'wav2lip_gan': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/wav2lip_gan.onnx', + 'path': resolve_relative_path('../.assets/models/wav2lip_gan.onnx') + } +} +OPTIONS : Optional[OptionsWithModel] = None + + +def get_frame_processor() -> Any: + global FRAME_PROCESSOR + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if FRAME_PROCESSOR is None: + model_path = get_options('model').get('path') + FRAME_PROCESSOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + return FRAME_PROCESSOR + + +def clear_frame_processor() -> None: + global FRAME_PROCESSOR + + FRAME_PROCESSOR = None + + +def get_options(key : Literal['model']) -> Any: + global OPTIONS + + if OPTIONS is None: + OPTIONS =\ + { + 'model': MODELS[frame_processors_globals.lip_syncer_model] + } + return OPTIONS.get(key) + + +def set_options(key : Literal['model'], value : Any) -> None: + global OPTIONS + + OPTIONS[key] = value + + +def register_args(program : ArgumentParser) -> None: + program.add_argument('--lip-syncer-model', help = wording.get('help.lip_syncer_model'), default = config.get_str_value('frame_processors.lip_syncer_model', 'wav2lip_gan'), choices = frame_processors_choices.lip_syncer_models) + + +def apply_args(program : ArgumentParser) -> None: + args = program.parse_args() + frame_processors_globals.lip_syncer_model = args.lip_syncer_model + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../.assets/models') + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not facefusion.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, [ model_url ]) + process_manager.end() + return is_file(model_path) + + +def post_check() -> bool: + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not facefusion.globals.skip_download and not is_download_done(model_url, model_path): + logger.error(wording.get('model_download_not_done') + wording.get('exclamation_mark'), NAME) + return False + if not is_file(model_path): + logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def pre_process(mode : ProcessMode) -> bool: + if not has_audio(facefusion.globals.source_paths): + logger.error(wording.get('select_audio_source') + wording.get('exclamation_mark'), NAME) + return False + if mode in [ 'output', 'preview' ] and not is_image(facefusion.globals.target_path) and not is_video(facefusion.globals.target_path): + logger.error(wording.get('select_image_or_video_target') + wording.get('exclamation_mark'), NAME) + return False + if mode == 'output' and not normalize_output_path(facefusion.globals.target_path, facefusion.globals.output_path): + logger.error(wording.get('select_file_or_directory_output') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def post_process() -> None: + read_static_image.cache_clear() + read_static_voice.cache_clear() + if facefusion.globals.video_memory_strategy == 'strict' or facefusion.globals.video_memory_strategy == 'moderate': + clear_frame_processor() + if facefusion.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() + clear_face_occluder() + clear_face_parser() + clear_voice_extractor() + + +def sync_lip(target_face : Face, temp_audio_frame : AudioFrame, temp_vision_frame : VisionFrame) -> VisionFrame: + frame_processor = get_frame_processor() + crop_mask_list = [] + temp_audio_frame = prepare_audio_frame(temp_audio_frame) + crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmarks.get('5/68'), 'ffhq_512', (512, 512)) + face_landmark_68 = cv2.transform(target_face.landmarks.get('68').reshape(1, -1, 2), affine_matrix).reshape(-1, 2) + bounding_box = create_bounding_box_from_face_landmark_68(face_landmark_68) + bounding_box[1] -= numpy.abs(bounding_box[3] - bounding_box[1]) * 0.125 + mouth_mask = create_mouth_mask(face_landmark_68) + crop_mask_list.append(mouth_mask) + box_mask = create_static_box_mask(crop_vision_frame.shape[:2][::-1], facefusion.globals.face_mask_blur, facefusion.globals.face_mask_padding) + crop_mask_list.append(box_mask) + + if 'occlusion' in facefusion.globals.face_mask_types: + occlusion_mask = create_occlusion_mask(crop_vision_frame) + crop_mask_list.append(occlusion_mask) + close_vision_frame, close_matrix = warp_face_by_bounding_box(crop_vision_frame, bounding_box, (96, 96)) + close_vision_frame = prepare_crop_frame(close_vision_frame) + with conditional_thread_semaphore(facefusion.globals.execution_providers): + close_vision_frame = frame_processor.run(None, + { + 'source': temp_audio_frame, + 'target': close_vision_frame + })[0] + crop_vision_frame = normalize_crop_frame(close_vision_frame) + crop_vision_frame = cv2.warpAffine(crop_vision_frame, cv2.invertAffineTransform(close_matrix), (512, 512), borderMode = cv2.BORDER_REPLICATE) + crop_mask = numpy.minimum.reduce(crop_mask_list) + paste_vision_frame = paste_back(temp_vision_frame, crop_vision_frame, crop_mask, affine_matrix) + return paste_vision_frame + + +def prepare_audio_frame(temp_audio_frame : AudioFrame) -> AudioFrame: + temp_audio_frame = numpy.maximum(numpy.exp(-5 * numpy.log(10)), temp_audio_frame) + temp_audio_frame = numpy.log10(temp_audio_frame) * 1.6 + 3.2 + temp_audio_frame = temp_audio_frame.clip(-4, 4).astype(numpy.float32) + temp_audio_frame = numpy.expand_dims(temp_audio_frame, axis = (0, 1)) + return temp_audio_frame + + +def prepare_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: + crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0) + prepare_vision_frame = crop_vision_frame.copy() + prepare_vision_frame[:, 48:] = 0 + crop_vision_frame = numpy.concatenate((prepare_vision_frame, crop_vision_frame), axis = 3) + crop_vision_frame = crop_vision_frame.transpose(0, 3, 1, 2).astype('float32') / 255.0 + return crop_vision_frame + + +def normalize_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: + crop_vision_frame = crop_vision_frame[0].transpose(1, 2, 0) + crop_vision_frame = crop_vision_frame.clip(0, 1) * 255 + crop_vision_frame = crop_vision_frame.astype(numpy.uint8) + return crop_vision_frame + + +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + pass + + +def process_frame(inputs : LipSyncerInputs) -> VisionFrame: + reference_faces = inputs.get('reference_faces') + source_audio_frame = inputs.get('source_audio_frame') + target_vision_frame = inputs.get('target_vision_frame') + + if facefusion.globals.face_selector_mode == 'many': + many_faces = get_many_faces(target_vision_frame) + if many_faces: + for target_face in many_faces: + target_vision_frame = sync_lip(target_face, source_audio_frame, target_vision_frame) + if facefusion.globals.face_selector_mode == 'one': + target_face = get_one_face(target_vision_frame) + if target_face: + target_vision_frame = sync_lip(target_face, source_audio_frame, target_vision_frame) + if facefusion.globals.face_selector_mode == 'reference': + similar_faces = find_similar_faces(reference_faces, target_vision_frame, facefusion.globals.reference_face_distance) + if similar_faces: + for similar_face in similar_faces: + target_vision_frame = sync_lip(similar_face, source_audio_frame, target_vision_frame) + return target_vision_frame + + +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: + reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None + source_audio_path = get_first(filter_audio_paths(source_paths)) + temp_video_fps = restrict_video_fps(facefusion.globals.target_path, facefusion.globals.output_video_fps) + + for queue_payload in process_manager.manage(queue_payloads): + frame_number = queue_payload['frame_number'] + target_vision_path = queue_payload['frame_path'] + source_audio_frame = get_voice_frame(source_audio_path, temp_video_fps, frame_number) + if not numpy.any(source_audio_frame): + source_audio_frame = create_empty_audio_frame() + target_vision_frame = read_image(target_vision_path) + output_vision_frame = process_frame( + { + 'reference_faces': reference_faces, + 'source_audio_frame': source_audio_frame, + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, output_vision_frame) + update_progress(1) + + +def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: + reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None + source_audio_frame = create_empty_audio_frame() + target_vision_frame = read_static_image(target_path) + output_vision_frame = process_frame( + { + 'reference_faces': reference_faces, + 'source_audio_frame': source_audio_frame, + 'target_vision_frame': target_vision_frame + }) + write_image(output_path, output_vision_frame) + + +def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: + source_audio_paths = filter_audio_paths(facefusion.globals.source_paths) + temp_video_fps = restrict_video_fps(facefusion.globals.target_path, facefusion.globals.output_video_fps) + for source_audio_path in source_audio_paths: + read_static_voice(source_audio_path, temp_video_fps) + frame_processors.multi_process_frames(source_paths, temp_frame_paths, process_frames) diff --git a/facefusion/processors/frame/typings.py b/facefusion/processors/frame/typings.py new file mode 100644 index 0000000000000000000000000000000000000000..05729c5db731c12ed37963254e0db38652112bee --- /dev/null +++ b/facefusion/processors/frame/typings.py @@ -0,0 +1,41 @@ +from typing import Literal, TypedDict + +from facefusion.typing import Face, FaceSet, AudioFrame, VisionFrame + +FaceDebuggerItem = Literal['bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-landmark-68/5', 'face-mask', 'face-detector-score', 'face-landmarker-score', 'age', 'gender'] +FaceEnhancerModel = Literal['codeformer', 'gfpgan_1.2', 'gfpgan_1.3', 'gfpgan_1.4', 'gpen_bfr_256', 'gpen_bfr_512', 'gpen_bfr_1024', 'gpen_bfr_2048', 'restoreformer_plus_plus'] +FaceSwapperModel = Literal['blendswap_256', 'inswapper_128', 'inswapper_128_fp16', 'simswap_256', 'simswap_512_unofficial', 'uniface_256'] +FrameColorizerModel = Literal['ddcolor', 'ddcolor_artistic', 'deoldify', 'deoldify_artistic', 'deoldify_stable'] +FrameEnhancerModel = Literal['lsdir_x4', 'nomos8k_sc_x4', 'real_esrgan_x2', 'real_esrgan_x2_fp16', 'real_esrgan_x4', 'real_esrgan_x4_fp16', 'real_hatgan_x4', 'span_kendata_x4'] +LipSyncerModel = Literal['wav2lip_gan'] + +FaceDebuggerInputs = TypedDict('FaceDebuggerInputs', +{ + 'reference_faces' : FaceSet, + 'target_vision_frame' : VisionFrame +}) +FaceEnhancerInputs = TypedDict('FaceEnhancerInputs', +{ + 'reference_faces' : FaceSet, + 'target_vision_frame' : VisionFrame +}) +FaceSwapperInputs = TypedDict('FaceSwapperInputs', +{ + 'reference_faces' : FaceSet, + 'source_face' : Face, + 'target_vision_frame' : VisionFrame +}) +FrameColorizerInputs = TypedDict('FrameColorizerInputs', +{ + 'target_vision_frame' : VisionFrame +}) +FrameEnhancerInputs = TypedDict('FrameEnhancerInputs', +{ + 'target_vision_frame' : VisionFrame +}) +LipSyncerInputs = TypedDict('LipSyncerInputs', +{ + 'reference_faces' : FaceSet, + 'source_audio_frame' : AudioFrame, + 'target_vision_frame' : VisionFrame +}) diff --git a/facefusion/statistics.py b/facefusion/statistics.py new file mode 100644 index 0000000000000000000000000000000000000000..f67c32b9039586ed4f05bcaf917ff94d7c9b81e9 --- /dev/null +++ b/facefusion/statistics.py @@ -0,0 +1,51 @@ +from typing import Any, Dict +import numpy + +import facefusion.globals +from facefusion.face_store import FACE_STORE +from facefusion.typing import FaceSet +from facefusion import logger + + +def create_statistics(static_faces : FaceSet) -> Dict[str, Any]: + face_detector_score_list = [] + face_landmarker_score_list = [] + statistics =\ + { + 'min_face_detector_score': 0, + 'min_face_landmarker_score': 0, + 'max_face_detector_score': 0, + 'max_face_landmarker_score': 0, + 'average_face_detector_score': 0, + 'average_face_landmarker_score': 0, + 'total_face_landmark_5_fallbacks': 0, + 'total_frames_with_faces': 0, + 'total_faces': 0 + } + + for faces in static_faces.values(): + statistics['total_frames_with_faces'] = statistics.get('total_frames_with_faces') + 1 + for face in faces: + statistics['total_faces'] = statistics.get('total_faces') + 1 + face_detector_score_list.append(face.scores.get('detector')) + face_landmarker_score_list.append(face.scores.get('landmarker')) + if numpy.array_equal(face.landmarks.get('5'), face.landmarks.get('5/68')): + statistics['total_face_landmark_5_fallbacks'] = statistics.get('total_face_landmark_5_fallbacks') + 1 + + if face_detector_score_list: + statistics['min_face_detector_score'] = round(min(face_detector_score_list), 2) + statistics['max_face_detector_score'] = round(max(face_detector_score_list), 2) + statistics['average_face_detector_score'] = round(numpy.mean(face_detector_score_list), 2) + if face_landmarker_score_list: + statistics['min_face_landmarker_score'] = round(min(face_landmarker_score_list), 2) + statistics['max_face_landmarker_score'] = round(max(face_landmarker_score_list), 2) + statistics['average_face_landmarker_score'] = round(numpy.mean(face_landmarker_score_list), 2) + return statistics + + +def conditional_log_statistics() -> None: + if facefusion.globals.log_level == 'debug': + statistics = create_statistics(FACE_STORE.get('static_faces')) + + for name, value in statistics.items(): + logger.debug(str(name) + ': ' + str(value), __name__.upper()) diff --git a/facefusion/thread_helper.py b/facefusion/thread_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..c08c6f17724a0bc66227ca9352e4e0c67d8a2b14 --- /dev/null +++ b/facefusion/thread_helper.py @@ -0,0 +1,21 @@ +from typing import List, Union, ContextManager +import threading +from contextlib import nullcontext + +THREAD_LOCK : threading.Lock = threading.Lock() +THREAD_SEMAPHORE : threading.Semaphore = threading.Semaphore() +NULL_CONTEXT : ContextManager[None] = nullcontext() + + +def thread_lock() -> threading.Lock: + return THREAD_LOCK + + +def thread_semaphore() -> threading.Semaphore: + return THREAD_SEMAPHORE + + +def conditional_thread_semaphore(execution_providers : List[str]) -> Union[threading.Semaphore, ContextManager[None]]: + if 'DmlExecutionProvider' in execution_providers: + return THREAD_SEMAPHORE + return NULL_CONTEXT diff --git a/facefusion/typing.py b/facefusion/typing.py new file mode 100644 index 0000000000000000000000000000000000000000..dbb33dc49043b91e3793ebeee2fefc02bf10758e --- /dev/null +++ b/facefusion/typing.py @@ -0,0 +1,122 @@ +from typing import Any, Literal, Callable, List, Tuple, Dict, TypedDict +from collections import namedtuple +import numpy + +BoundingBox = numpy.ndarray[Any, Any] +FaceLandmark5 = numpy.ndarray[Any, Any] +FaceLandmark68 = numpy.ndarray[Any, Any] +FaceLandmarkSet = TypedDict('FaceLandmarkSet', +{ + '5' : FaceLandmark5, # type: ignore[valid-type] + '5/68' : FaceLandmark5, # type: ignore[valid-type] + '68' : FaceLandmark68, # type: ignore[valid-type] + '68/5' : FaceLandmark68 # type: ignore[valid-type] +}) +Score = float +FaceScoreSet = TypedDict('FaceScoreSet', +{ + 'detector' : Score, + 'landmarker' : Score +}) +Embedding = numpy.ndarray[Any, Any] +Face = namedtuple('Face', +[ + 'bounding_box', + 'landmarks', + 'scores', + 'embedding', + 'normed_embedding', + 'gender', + 'age' +]) +FaceSet = Dict[str, List[Face]] +FaceStore = TypedDict('FaceStore', +{ + 'static_faces' : FaceSet, + 'reference_faces': FaceSet +}) + +VisionFrame = numpy.ndarray[Any, Any] +Mask = numpy.ndarray[Any, Any] +Matrix = numpy.ndarray[Any, Any] +Translation = numpy.ndarray[Any, Any] + +AudioBuffer = bytes +Audio = numpy.ndarray[Any, Any] +AudioChunk = numpy.ndarray[Any, Any] +AudioFrame = numpy.ndarray[Any, Any] +Spectrogram = numpy.ndarray[Any, Any] +MelFilterBank = numpy.ndarray[Any, Any] + +Fps = float +Padding = Tuple[int, int, int, int] +Resolution = Tuple[int, int] + +ProcessState = Literal['checking', 'processing', 'stopping', 'pending'] +QueuePayload = TypedDict('QueuePayload', +{ + 'frame_number' : int, + 'frame_path' : str +}) +UpdateProgress = Callable[[int], None] +ProcessFrames = Callable[[List[str], List[QueuePayload], UpdateProgress], None] + +WarpTemplate = Literal['arcface_112_v1', 'arcface_112_v2', 'arcface_128_v2', 'ffhq_512'] +WarpTemplateSet = Dict[WarpTemplate, numpy.ndarray[Any, Any]] +ProcessMode = Literal['output', 'preview', 'stream'] + +LogLevel = Literal['error', 'warn', 'info', 'debug'] +VideoMemoryStrategy = Literal['strict', 'moderate', 'tolerant'] +FaceSelectorMode = Literal['many', 'one', 'reference'] +FaceAnalyserOrder = Literal['left-right', 'right-left', 'top-bottom', 'bottom-top', 'small-large', 'large-small', 'best-worst', 'worst-best'] +FaceAnalyserAge = Literal['child', 'teen', 'adult', 'senior'] +FaceAnalyserGender = Literal['female', 'male'] +FaceDetectorModel = Literal['many', 'retinaface', 'scrfd', 'yoloface', 'yunet'] +FaceDetectorTweak = Literal['low-luminance', 'high-luminance'] +FaceRecognizerModel = Literal['arcface_blendswap', 'arcface_inswapper', 'arcface_simswap', 'arcface_uniface'] +FaceMaskType = Literal['box', 'occlusion', 'region'] +FaceMaskRegion = Literal['skin', 'left-eyebrow', 'right-eyebrow', 'left-eye', 'right-eye', 'glasses', 'nose', 'mouth', 'upper-lip', 'lower-lip'] +TempFrameFormat = Literal['jpg', 'png', 'bmp'] +OutputVideoEncoder = Literal['libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf'] +OutputVideoPreset = Literal['ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow'] + +ModelValue = Dict[str, Any] +ModelSet = Dict[str, ModelValue] +OptionsWithModel = TypedDict('OptionsWithModel', +{ + 'model' : ModelValue +}) + +ValueAndUnit = TypedDict('ValueAndUnit', +{ + 'value' : str, + 'unit' : str +}) +ExecutionDeviceFramework = TypedDict('ExecutionDeviceFramework', +{ + 'name' : str, + 'version' : str +}) +ExecutionDeviceProduct = TypedDict('ExecutionDeviceProduct', +{ + 'vendor' : str, + 'name' : str +}) +ExecutionDeviceVideoMemory = TypedDict('ExecutionDeviceVideoMemory', +{ + 'total' : ValueAndUnit, + 'free' : ValueAndUnit +}) +ExecutionDeviceUtilization = TypedDict('ExecutionDeviceUtilization', +{ + 'gpu' : ValueAndUnit, + 'memory' : ValueAndUnit +}) +ExecutionDevice = TypedDict('ExecutionDevice', +{ + 'driver_version' : str, + 'framework' : ExecutionDeviceFramework, + 'product' : ExecutionDeviceProduct, + 'video_memory' : ExecutionDeviceVideoMemory, + 'utilization' : ExecutionDeviceUtilization +}) diff --git a/facefusion/uis/__init__.py b/facefusion/uis/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/facefusion/uis/__pycache__/__init__.cpython-310.pyc b/facefusion/uis/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..73ba2f79a3673132974819ee09c1c7cfde779073 Binary files /dev/null and b/facefusion/uis/__pycache__/__init__.cpython-310.pyc differ diff --git a/facefusion/uis/__pycache__/choices.cpython-310.pyc b/facefusion/uis/__pycache__/choices.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..44214cdbee20b72f46fc02648e0355a9fb0f5534 Binary files /dev/null and b/facefusion/uis/__pycache__/choices.cpython-310.pyc differ diff --git a/facefusion/uis/__pycache__/core.cpython-310.pyc b/facefusion/uis/__pycache__/core.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9abb3972bfab327868e4e321268bda5b4e50e79a Binary files /dev/null and b/facefusion/uis/__pycache__/core.cpython-310.pyc differ diff --git a/facefusion/uis/__pycache__/overrides.cpython-310.pyc b/facefusion/uis/__pycache__/overrides.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7dfca305a88edeaeb1ae6fdc0509abbfce8861c9 Binary files /dev/null and b/facefusion/uis/__pycache__/overrides.cpython-310.pyc differ diff --git a/facefusion/uis/__pycache__/typing.cpython-310.pyc b/facefusion/uis/__pycache__/typing.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4316377edf57bf0447bc5990b2cd57c2f91216a8 Binary files /dev/null and b/facefusion/uis/__pycache__/typing.cpython-310.pyc differ diff --git a/facefusion/uis/assets/fixes.css b/facefusion/uis/assets/fixes.css new file mode 100644 index 0000000000000000000000000000000000000000..f65a7cfd3e3e34111a09a9100c6714ff49558615 --- /dev/null +++ b/facefusion/uis/assets/fixes.css @@ -0,0 +1,7 @@ +:root:root:root button:not([class]) +{ + border-radius: 0.375rem; + float: left; + overflow: hidden; + width: 100%; +} diff --git a/facefusion/uis/assets/overrides.css b/facefusion/uis/assets/overrides.css new file mode 100644 index 0000000000000000000000000000000000000000..744ed3ba342641a20ec585cae438ccfbb8b21271 --- /dev/null +++ b/facefusion/uis/assets/overrides.css @@ -0,0 +1,58 @@ +:root:root:root input[type="number"] +{ + max-width: 6rem; +} + +:root:root:root [type="checkbox"], +:root:root:root [type="radio"] +{ + border-radius: 50%; + height: 1.125rem; + width: 1.125rem; +} + +:root:root:root input[type="range"] +{ + height: 0.5rem; +} + +:root:root:root input[type="range"]::-moz-range-thumb, +:root:root:root input[type="range"]::-webkit-slider-thumb +{ + background: var(--neutral-300); + border: unset; + border-radius: 50%; + height: 1.125rem; + width: 1.125rem; +} + +:root:root:root input[type="range"]::-webkit-slider-thumb +{ + margin-top: 0.375rem; +} + +:root:root:root .grid-wrap.fixed-height +{ + min-height: unset; +} + +:root:root:root .grid-container +{ + grid-auto-rows: minmax(5em, 1fr); + grid-template-columns: repeat(var(--grid-cols), minmax(5em, 1fr)); + grid-template-rows: repeat(var(--grid-rows), minmax(5em, 1fr)); +} + +:root:root:root .tab-nav > button +{ + border: unset; + border-bottom: 0.125rem solid transparent; + font-size: 1.125em; + margin: 0.5rem 1rem; + padding: 0; +} + +:root:root:root .tab-nav > button.selected +{ + border-bottom: 0.125rem solid; +} diff --git a/facefusion/uis/choices.py b/facefusion/uis/choices.py new file mode 100644 index 0000000000000000000000000000000000000000..cae0256939ade57cb063066e81bb7ea96b7ab0f8 --- /dev/null +++ b/facefusion/uis/choices.py @@ -0,0 +1,7 @@ +from typing import List + +from facefusion.uis.typing import WebcamMode + +common_options : List[str] = [ 'keep-temp', 'skip-audio', 'skip-download' ] +webcam_modes : List[WebcamMode] = [ 'inline', 'udp', 'v4l2' ] +webcam_resolutions : List[str] = [ '320x240', '640x480', '800x600', '1024x768', '1280x720', '1280x960', '1920x1080', '2560x1440', '3840x2160' ] diff --git a/facefusion/uis/components/__init__.py b/facefusion/uis/components/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/facefusion/uis/components/__pycache__/__init__.cpython-310.pyc b/facefusion/uis/components/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3ddf2c5a374bfc51b347c2ee8fa19fae6e416248 Binary files /dev/null and b/facefusion/uis/components/__pycache__/__init__.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/about.cpython-310.pyc b/facefusion/uis/components/__pycache__/about.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4e537d9b6b786f84e47dca5a92a621b8d11c8190 Binary files /dev/null and b/facefusion/uis/components/__pycache__/about.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/common_options.cpython-310.pyc b/facefusion/uis/components/__pycache__/common_options.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7c042d8dcdb7bee8bebd5de087813463dfa7aa81 Binary files /dev/null and b/facefusion/uis/components/__pycache__/common_options.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/execution.cpython-310.pyc b/facefusion/uis/components/__pycache__/execution.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1c1d20f7c581ce843622fe720225db9b8b2e90cb Binary files /dev/null and b/facefusion/uis/components/__pycache__/execution.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/execution_queue_count.cpython-310.pyc b/facefusion/uis/components/__pycache__/execution_queue_count.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cdce56144def7c70e9789204e285a94ee788236f Binary files /dev/null and b/facefusion/uis/components/__pycache__/execution_queue_count.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/execution_thread_count.cpython-310.pyc b/facefusion/uis/components/__pycache__/execution_thread_count.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..01084bfa060e00b4b874186f8322a5fb1b3aa519 Binary files /dev/null and b/facefusion/uis/components/__pycache__/execution_thread_count.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/face_analyser.cpython-310.pyc b/facefusion/uis/components/__pycache__/face_analyser.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ad2240ef6e35bc0ffef8471c7a39b7e00e14f5e7 Binary files /dev/null and b/facefusion/uis/components/__pycache__/face_analyser.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/face_masker.cpython-310.pyc b/facefusion/uis/components/__pycache__/face_masker.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ebd93159c376a3cf6bc98b1d2a8918c11a31da53 Binary files /dev/null and b/facefusion/uis/components/__pycache__/face_masker.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/face_selector.cpython-310.pyc b/facefusion/uis/components/__pycache__/face_selector.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..816a264d8873ec2bc041ad3655ef6c80db0a9e0f Binary files /dev/null and b/facefusion/uis/components/__pycache__/face_selector.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/frame_processors.cpython-310.pyc b/facefusion/uis/components/__pycache__/frame_processors.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bbe1d2d88436ed5c9c417d33e9fa7e762b6c9f6b Binary files /dev/null and b/facefusion/uis/components/__pycache__/frame_processors.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/frame_processors_options.cpython-310.pyc b/facefusion/uis/components/__pycache__/frame_processors_options.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..74653eb8d6aee5b1e03c70f020ee32b728dc699a Binary files /dev/null and b/facefusion/uis/components/__pycache__/frame_processors_options.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/memory.cpython-310.pyc b/facefusion/uis/components/__pycache__/memory.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b22262969a100b1032cec59bd3a9ffa4572f3c81 Binary files /dev/null and b/facefusion/uis/components/__pycache__/memory.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/output.cpython-310.pyc b/facefusion/uis/components/__pycache__/output.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3f4964ead29e1ff23a384cafdd31dc3ef761755d Binary files /dev/null and b/facefusion/uis/components/__pycache__/output.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/output_options.cpython-310.pyc b/facefusion/uis/components/__pycache__/output_options.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a99e7cca6235a48369b4794050b7ce4e370ba277 Binary files /dev/null and b/facefusion/uis/components/__pycache__/output_options.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/preview.cpython-310.pyc b/facefusion/uis/components/__pycache__/preview.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1372788cad9754e555c029b7d779b256d1844577 Binary files /dev/null and b/facefusion/uis/components/__pycache__/preview.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/source.cpython-310.pyc b/facefusion/uis/components/__pycache__/source.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b0d9682bf5792e986b8e9463681832dbca4bc41d Binary files /dev/null and b/facefusion/uis/components/__pycache__/source.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/target.cpython-310.pyc b/facefusion/uis/components/__pycache__/target.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..924bd950e77789a8ad7c8c7ff049585cd83a7a8a Binary files /dev/null and b/facefusion/uis/components/__pycache__/target.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/temp_frame.cpython-310.pyc b/facefusion/uis/components/__pycache__/temp_frame.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fd416622c575aa6566d8f5fb49b6d9ab42a31ec9 Binary files /dev/null and b/facefusion/uis/components/__pycache__/temp_frame.cpython-310.pyc differ diff --git a/facefusion/uis/components/__pycache__/trim_frame.cpython-310.pyc b/facefusion/uis/components/__pycache__/trim_frame.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54b7af34c0d3048dd3c3dca1e8ca22e30559689c Binary files /dev/null and b/facefusion/uis/components/__pycache__/trim_frame.cpython-310.pyc differ diff --git a/facefusion/uis/components/about.py b/facefusion/uis/components/about.py new file mode 100644 index 0000000000000000000000000000000000000000..01ecb00d3e04eea1b9febe602d750abfddb40fd2 --- /dev/null +++ b/facefusion/uis/components/about.py @@ -0,0 +1,23 @@ +from typing import Optional +import gradio + +from facefusion import metadata, wording + +ABOUT_BUTTON : Optional[gradio.HTML] = None +DONATE_BUTTON : Optional[gradio.HTML] = None + + +def render() -> None: + global ABOUT_BUTTON + global DONATE_BUTTON + + ABOUT_BUTTON = gradio.Button( + value = metadata.get('name') + ' ' + metadata.get('version'), + variant = 'primary', + link = metadata.get('url') + ) + DONATE_BUTTON = gradio.Button( + value = wording.get('uis.donate_button'), + link = 'https://donate.facefusion.io', + size = 'sm' + ) diff --git a/facefusion/uis/components/benchmark.py b/facefusion/uis/components/benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..6322febdc5dd8fc10b7a777b3c1ab73b192eec6a --- /dev/null +++ b/facefusion/uis/components/benchmark.py @@ -0,0 +1,140 @@ +from typing import Any, Optional, List, Dict, Generator +from time import sleep, perf_counter +import tempfile +import statistics +import gradio + +import facefusion.globals +from facefusion import process_manager, wording +from facefusion.face_store import clear_static_faces +from facefusion.processors.frame.core import get_frame_processors_modules +from facefusion.vision import count_video_frame_total, detect_video_resolution, detect_video_fps, pack_resolution +from facefusion.core import conditional_process +from facefusion.memory import limit_system_memory +from facefusion.filesystem import clear_temp +from facefusion.uis.core import get_ui_component + +BENCHMARK_RESULTS_DATAFRAME : Optional[gradio.Dataframe] = None +BENCHMARK_START_BUTTON : Optional[gradio.Button] = None +BENCHMARK_CLEAR_BUTTON : Optional[gradio.Button] = None +BENCHMARKS : Dict[str, str] =\ +{ + '240p': '.assets/examples/target-240p.mp4', + '360p': '.assets/examples/target-360p.mp4', + '540p': '.assets/examples/target-540p.mp4', + '720p': '.assets/examples/target-720p.mp4', + '1080p': '.assets/examples/target-1080p.mp4', + '1440p': '.assets/examples/target-1440p.mp4', + '2160p': '.assets/examples/target-2160p.mp4' +} + + +def render() -> None: + global BENCHMARK_RESULTS_DATAFRAME + global BENCHMARK_START_BUTTON + global BENCHMARK_CLEAR_BUTTON + + BENCHMARK_RESULTS_DATAFRAME = gradio.Dataframe( + label = wording.get('uis.benchmark_results_dataframe'), + headers = + [ + 'target_path', + 'benchmark_cycles', + 'average_run', + 'fastest_run', + 'slowest_run', + 'relative_fps' + ], + datatype = + [ + 'str', + 'number', + 'number', + 'number', + 'number', + 'number' + ] + ) + BENCHMARK_START_BUTTON = gradio.Button( + value = wording.get('uis.start_button'), + variant = 'primary', + size = 'sm' + ) + BENCHMARK_CLEAR_BUTTON = gradio.Button( + value = wording.get('uis.clear_button'), + size = 'sm' + ) + + +def listen() -> None: + benchmark_runs_checkbox_group = get_ui_component('benchmark_runs_checkbox_group') + benchmark_cycles_slider = get_ui_component('benchmark_cycles_slider') + + if benchmark_runs_checkbox_group and benchmark_cycles_slider: + BENCHMARK_START_BUTTON.click(start, inputs = [ benchmark_runs_checkbox_group, benchmark_cycles_slider ], outputs = BENCHMARK_RESULTS_DATAFRAME) + BENCHMARK_CLEAR_BUTTON.click(clear, outputs = BENCHMARK_RESULTS_DATAFRAME) + + +def start(benchmark_runs : List[str], benchmark_cycles : int) -> Generator[List[Any], None, None]: + facefusion.globals.source_paths = [ '.assets/examples/source.jpg', '.assets/examples/source.mp3' ] + facefusion.globals.output_path = tempfile.gettempdir() + facefusion.globals.face_landmarker_score = 0 + facefusion.globals.temp_frame_format = 'bmp' + facefusion.globals.output_video_preset = 'ultrafast' + benchmark_results = [] + target_paths = [ BENCHMARKS[benchmark_run] for benchmark_run in benchmark_runs if benchmark_run in BENCHMARKS ] + + if target_paths: + pre_process() + for target_path in target_paths: + facefusion.globals.target_path = target_path + benchmark_results.append(benchmark(benchmark_cycles)) + yield benchmark_results + post_process() + + +def pre_process() -> None: + if facefusion.globals.system_memory_limit > 0: + limit_system_memory(facefusion.globals.system_memory_limit) + for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): + frame_processor_module.get_frame_processor() + + +def post_process() -> None: + clear_static_faces() + + +def benchmark(benchmark_cycles : int) -> List[Any]: + process_times = [] + video_frame_total = count_video_frame_total(facefusion.globals.target_path) + output_video_resolution = detect_video_resolution(facefusion.globals.target_path) + facefusion.globals.output_video_resolution = pack_resolution(output_video_resolution) + facefusion.globals.output_video_fps = detect_video_fps(facefusion.globals.target_path) + + for index in range(benchmark_cycles): + start_time = perf_counter() + conditional_process() + end_time = perf_counter() + process_times.append(end_time - start_time) + average_run = round(statistics.mean(process_times), 2) + fastest_run = round(min(process_times), 2) + slowest_run = round(max(process_times), 2) + relative_fps = round(video_frame_total * benchmark_cycles / sum(process_times), 2) + + return\ + [ + facefusion.globals.target_path, + benchmark_cycles, + average_run, + fastest_run, + slowest_run, + relative_fps + ] + + +def clear() -> gradio.Dataframe: + while process_manager.is_processing(): + sleep(0.5) + if facefusion.globals.target_path: + clear_temp(facefusion.globals.target_path) + return gradio.Dataframe(value = None) diff --git a/facefusion/uis/components/benchmark_options.py b/facefusion/uis/components/benchmark_options.py new file mode 100644 index 0000000000000000000000000000000000000000..6748dd9b9a561b002b447dc63077e2c7ff4d2628 --- /dev/null +++ b/facefusion/uis/components/benchmark_options.py @@ -0,0 +1,29 @@ +from typing import Optional +import gradio + +from facefusion import wording +from facefusion.uis.core import register_ui_component +from facefusion.uis.components.benchmark import BENCHMARKS + +BENCHMARK_RUNS_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None +BENCHMARK_CYCLES_SLIDER : Optional[gradio.Button] = None + + +def render() -> None: + global BENCHMARK_RUNS_CHECKBOX_GROUP + global BENCHMARK_CYCLES_SLIDER + + BENCHMARK_RUNS_CHECKBOX_GROUP = gradio.CheckboxGroup( + label = wording.get('uis.benchmark_runs_checkbox_group'), + value = list(BENCHMARKS.keys()), + choices = list(BENCHMARKS.keys()) + ) + BENCHMARK_CYCLES_SLIDER = gradio.Slider( + label = wording.get('uis.benchmark_cycles_slider'), + value = 5, + step = 1, + minimum = 1, + maximum = 10 + ) + register_ui_component('benchmark_runs_checkbox_group', BENCHMARK_RUNS_CHECKBOX_GROUP) + register_ui_component('benchmark_cycles_slider', BENCHMARK_CYCLES_SLIDER) diff --git a/facefusion/uis/components/common_options.py b/facefusion/uis/components/common_options.py new file mode 100644 index 0000000000000000000000000000000000000000..43817413df444b4cfd7170300658184b7f645d17 --- /dev/null +++ b/facefusion/uis/components/common_options.py @@ -0,0 +1,35 @@ +from typing import Optional, List +import gradio + +import facefusion.globals +from facefusion import wording +from facefusion.uis import choices as uis_choices + +COMMON_OPTIONS_CHECKBOX_GROUP : Optional[gradio.Checkboxgroup] = None + + +def render() -> None: + global COMMON_OPTIONS_CHECKBOX_GROUP + + value = [] + if facefusion.globals.keep_temp: + value.append('keep-temp') + if facefusion.globals.skip_audio: + value.append('skip-audio') + if facefusion.globals.skip_download: + value.append('skip-download') + COMMON_OPTIONS_CHECKBOX_GROUP = gradio.Checkboxgroup( + label = wording.get('uis.common_options_checkbox_group'), + choices = uis_choices.common_options, + value = value + ) + + +def listen() -> None: + COMMON_OPTIONS_CHECKBOX_GROUP.change(update, inputs = COMMON_OPTIONS_CHECKBOX_GROUP) + + +def update(common_options : List[str]) -> None: + facefusion.globals.keep_temp = 'keep-temp' in common_options + facefusion.globals.skip_audio = 'skip-audio' in common_options + facefusion.globals.skip_download = 'skip-download' in common_options diff --git a/facefusion/uis/components/execution.py b/facefusion/uis/components/execution.py new file mode 100644 index 0000000000000000000000000000000000000000..083727de6b7616367d4b895e696ef328212e57f7 --- /dev/null +++ b/facefusion/uis/components/execution.py @@ -0,0 +1,33 @@ +from typing import List, Optional +import gradio +import onnxruntime + +import facefusion.globals +from facefusion import wording +from facefusion.face_analyser import clear_face_analyser +from facefusion.processors.frame.core import clear_frame_processors_modules +from facefusion.execution import encode_execution_providers, decode_execution_providers + +EXECUTION_PROVIDERS_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None + + +def render() -> None: + global EXECUTION_PROVIDERS_CHECKBOX_GROUP + + EXECUTION_PROVIDERS_CHECKBOX_GROUP = gradio.CheckboxGroup( + label = wording.get('uis.execution_providers_checkbox_group'), + choices = encode_execution_providers(onnxruntime.get_available_providers()), + value = encode_execution_providers(facefusion.globals.execution_providers) + ) + + +def listen() -> None: + EXECUTION_PROVIDERS_CHECKBOX_GROUP.change(update_execution_providers, inputs = EXECUTION_PROVIDERS_CHECKBOX_GROUP, outputs = EXECUTION_PROVIDERS_CHECKBOX_GROUP) + + +def update_execution_providers(execution_providers : List[str]) -> gradio.CheckboxGroup: + clear_face_analyser() + clear_frame_processors_modules() + execution_providers = execution_providers or encode_execution_providers(onnxruntime.get_available_providers()) + facefusion.globals.execution_providers = decode_execution_providers(execution_providers) + return gradio.CheckboxGroup(value = execution_providers) diff --git a/facefusion/uis/components/execution_queue_count.py b/facefusion/uis/components/execution_queue_count.py new file mode 100644 index 0000000000000000000000000000000000000000..1b6725e54901c6c007ac4ef22ae934ed3242bee9 --- /dev/null +++ b/facefusion/uis/components/execution_queue_count.py @@ -0,0 +1,28 @@ +from typing import Optional +import gradio + +import facefusion.globals +import facefusion.choices +from facefusion import wording + +EXECUTION_QUEUE_COUNT_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global EXECUTION_QUEUE_COUNT_SLIDER + + EXECUTION_QUEUE_COUNT_SLIDER = gradio.Slider( + label = wording.get('uis.execution_queue_count_slider'), + value = facefusion.globals.execution_queue_count, + step = facefusion.choices.execution_queue_count_range[1] - facefusion.choices.execution_queue_count_range[0], + minimum = facefusion.choices.execution_queue_count_range[0], + maximum = facefusion.choices.execution_queue_count_range[-1] + ) + + +def listen() -> None: + EXECUTION_QUEUE_COUNT_SLIDER.release(update_execution_queue_count, inputs = EXECUTION_QUEUE_COUNT_SLIDER) + + +def update_execution_queue_count(execution_queue_count : int = 1) -> None: + facefusion.globals.execution_queue_count = execution_queue_count diff --git a/facefusion/uis/components/execution_thread_count.py b/facefusion/uis/components/execution_thread_count.py new file mode 100644 index 0000000000000000000000000000000000000000..4a1f46469764725565bfe068c084b8ca604d59ef --- /dev/null +++ b/facefusion/uis/components/execution_thread_count.py @@ -0,0 +1,29 @@ +from typing import Optional +import gradio + +import facefusion.globals +import facefusion.choices +from facefusion import wording + +EXECUTION_THREAD_COUNT_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global EXECUTION_THREAD_COUNT_SLIDER + + EXECUTION_THREAD_COUNT_SLIDER = gradio.Slider( + label = wording.get('uis.execution_thread_count_slider'), + value = facefusion.globals.execution_thread_count, + step = facefusion.choices.execution_thread_count_range[1] - facefusion.choices.execution_thread_count_range[0], + minimum = facefusion.choices.execution_thread_count_range[0], + maximum = facefusion.choices.execution_thread_count_range[-1] + ) + + +def listen() -> None: + EXECUTION_THREAD_COUNT_SLIDER.release(update_execution_thread_count, inputs = EXECUTION_THREAD_COUNT_SLIDER) + + +def update_execution_thread_count(execution_thread_count : int = 1) -> None: + facefusion.globals.execution_thread_count = execution_thread_count + diff --git a/facefusion/uis/components/face_analyser.py b/facefusion/uis/components/face_analyser.py new file mode 100644 index 0000000000000000000000000000000000000000..aed04182dfe8709217a033305f8d53b326c00759 --- /dev/null +++ b/facefusion/uis/components/face_analyser.py @@ -0,0 +1,123 @@ +from typing import Optional, Dict, Any, Tuple + +import gradio + +import facefusion.globals +import facefusion.choices +from facefusion import face_analyser, wording +from facefusion.typing import FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, FaceDetectorModel +from facefusion.uis.core import register_ui_component + +FACE_ANALYSER_ORDER_DROPDOWN : Optional[gradio.Dropdown] = None +FACE_ANALYSER_AGE_DROPDOWN : Optional[gradio.Dropdown] = None +FACE_ANALYSER_GENDER_DROPDOWN : Optional[gradio.Dropdown] = None +FACE_DETECTOR_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None +FACE_DETECTOR_SIZE_DROPDOWN : Optional[gradio.Dropdown] = None +FACE_DETECTOR_SCORE_SLIDER : Optional[gradio.Slider] = None +FACE_LANDMARKER_SCORE_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global FACE_ANALYSER_ORDER_DROPDOWN + global FACE_ANALYSER_AGE_DROPDOWN + global FACE_ANALYSER_GENDER_DROPDOWN + global FACE_DETECTOR_MODEL_DROPDOWN + global FACE_DETECTOR_SIZE_DROPDOWN + global FACE_DETECTOR_SCORE_SLIDER + global FACE_LANDMARKER_SCORE_SLIDER + + face_detector_size_dropdown_args : Dict[str, Any] =\ + { + 'label': wording.get('uis.face_detector_size_dropdown'), + 'value': facefusion.globals.face_detector_size + } + if facefusion.globals.face_detector_size in facefusion.choices.face_detector_set[facefusion.globals.face_detector_model]: + face_detector_size_dropdown_args['choices'] = facefusion.choices.face_detector_set[facefusion.globals.face_detector_model] + with gradio.Row(): + FACE_ANALYSER_ORDER_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.face_analyser_order_dropdown'), + choices = facefusion.choices.face_analyser_orders, + value = facefusion.globals.face_analyser_order + ) + FACE_ANALYSER_AGE_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.face_analyser_age_dropdown'), + choices = [ 'none' ] + facefusion.choices.face_analyser_ages, + value = facefusion.globals.face_analyser_age or 'none' + ) + FACE_ANALYSER_GENDER_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.face_analyser_gender_dropdown'), + choices = [ 'none' ] + facefusion.choices.face_analyser_genders, + value = facefusion.globals.face_analyser_gender or 'none' + ) + FACE_DETECTOR_MODEL_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.face_detector_model_dropdown'), + choices = facefusion.choices.face_detector_set.keys(), + value = facefusion.globals.face_detector_model + ) + FACE_DETECTOR_SIZE_DROPDOWN = gradio.Dropdown(**face_detector_size_dropdown_args) + with gradio.Row(): + FACE_DETECTOR_SCORE_SLIDER = gradio.Slider( + label = wording.get('uis.face_detector_score_slider'), + value = facefusion.globals.face_detector_score, + step = facefusion.choices.face_detector_score_range[1] - facefusion.choices.face_detector_score_range[0], + minimum = facefusion.choices.face_detector_score_range[0], + maximum = facefusion.choices.face_detector_score_range[-1] + ) + FACE_LANDMARKER_SCORE_SLIDER = gradio.Slider( + label = wording.get('uis.face_landmarker_score_slider'), + value = facefusion.globals.face_landmarker_score, + step = facefusion.choices.face_landmarker_score_range[1] - facefusion.choices.face_landmarker_score_range[0], + minimum = facefusion.choices.face_landmarker_score_range[0], + maximum = facefusion.choices.face_landmarker_score_range[-1] + ) + register_ui_component('face_analyser_order_dropdown', FACE_ANALYSER_ORDER_DROPDOWN) + register_ui_component('face_analyser_age_dropdown', FACE_ANALYSER_AGE_DROPDOWN) + register_ui_component('face_analyser_gender_dropdown', FACE_ANALYSER_GENDER_DROPDOWN) + register_ui_component('face_detector_model_dropdown', FACE_DETECTOR_MODEL_DROPDOWN) + register_ui_component('face_detector_size_dropdown', FACE_DETECTOR_SIZE_DROPDOWN) + register_ui_component('face_detector_score_slider', FACE_DETECTOR_SCORE_SLIDER) + register_ui_component('face_landmarker_score_slider', FACE_LANDMARKER_SCORE_SLIDER) + + +def listen() -> None: + FACE_ANALYSER_ORDER_DROPDOWN.change(update_face_analyser_order, inputs = FACE_ANALYSER_ORDER_DROPDOWN) + FACE_ANALYSER_AGE_DROPDOWN.change(update_face_analyser_age, inputs = FACE_ANALYSER_AGE_DROPDOWN) + FACE_ANALYSER_GENDER_DROPDOWN.change(update_face_analyser_gender, inputs = FACE_ANALYSER_GENDER_DROPDOWN) + FACE_DETECTOR_MODEL_DROPDOWN.change(update_face_detector_model, inputs = FACE_DETECTOR_MODEL_DROPDOWN, outputs = [ FACE_DETECTOR_MODEL_DROPDOWN, FACE_DETECTOR_SIZE_DROPDOWN ]) + FACE_DETECTOR_SIZE_DROPDOWN.change(update_face_detector_size, inputs = FACE_DETECTOR_SIZE_DROPDOWN) + FACE_DETECTOR_SCORE_SLIDER.release(update_face_detector_score, inputs = FACE_DETECTOR_SCORE_SLIDER) + FACE_LANDMARKER_SCORE_SLIDER.release(update_face_landmarker_score, inputs = FACE_LANDMARKER_SCORE_SLIDER) + + +def update_face_analyser_order(face_analyser_order : FaceAnalyserOrder) -> None: + facefusion.globals.face_analyser_order = face_analyser_order if face_analyser_order != 'none' else None + + +def update_face_analyser_age(face_analyser_age : FaceAnalyserAge) -> None: + facefusion.globals.face_analyser_age = face_analyser_age if face_analyser_age != 'none' else None + + +def update_face_analyser_gender(face_analyser_gender : FaceAnalyserGender) -> None: + facefusion.globals.face_analyser_gender = face_analyser_gender if face_analyser_gender != 'none' else None + + +def update_face_detector_model(face_detector_model : FaceDetectorModel) -> Tuple[gradio.Dropdown, gradio.Dropdown]: + facefusion.globals.face_detector_model = face_detector_model + update_face_detector_size('640x640') + if face_analyser.pre_check(): + if facefusion.globals.face_detector_size in facefusion.choices.face_detector_set[face_detector_model]: + return gradio.Dropdown(value = facefusion.globals.face_detector_model), gradio.Dropdown(value = facefusion.globals.face_detector_size, choices = facefusion.choices.face_detector_set[face_detector_model]) + return gradio.Dropdown(value = facefusion.globals.face_detector_model), gradio.Dropdown(value = facefusion.globals.face_detector_size, choices = [ facefusion.globals.face_detector_size ]) + return gradio.Dropdown(), gradio.Dropdown() + + +def update_face_detector_size(face_detector_size : str) -> None: + facefusion.globals.face_detector_size = face_detector_size + + +def update_face_detector_score(face_detector_score : float) -> None: + facefusion.globals.face_detector_score = face_detector_score + + +def update_face_landmarker_score(face_landmarker_score : float) -> None: + facefusion.globals.face_landmarker_score = face_landmarker_score diff --git a/facefusion/uis/components/face_masker.py b/facefusion/uis/components/face_masker.py new file mode 100644 index 0000000000000000000000000000000000000000..bb1c28c96952a8afdc901e6e573a3b251c5688b7 --- /dev/null +++ b/facefusion/uis/components/face_masker.py @@ -0,0 +1,119 @@ +from typing import Optional, Tuple, List +import gradio + +import facefusion.globals +import facefusion.choices +from facefusion import wording +from facefusion.typing import FaceMaskType, FaceMaskRegion +from facefusion.uis.core import register_ui_component + +FACE_MASK_TYPES_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None +FACE_MASK_BLUR_SLIDER : Optional[gradio.Slider] = None +FACE_MASK_BOX_GROUP : Optional[gradio.Group] = None +FACE_MASK_REGION_GROUP : Optional[gradio.Group] = None +FACE_MASK_PADDING_TOP_SLIDER : Optional[gradio.Slider] = None +FACE_MASK_PADDING_RIGHT_SLIDER : Optional[gradio.Slider] = None +FACE_MASK_PADDING_BOTTOM_SLIDER : Optional[gradio.Slider] = None +FACE_MASK_PADDING_LEFT_SLIDER : Optional[gradio.Slider] = None +FACE_MASK_REGION_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None + + +def render() -> None: + global FACE_MASK_TYPES_CHECKBOX_GROUP + global FACE_MASK_BLUR_SLIDER + global FACE_MASK_BOX_GROUP + global FACE_MASK_REGION_GROUP + global FACE_MASK_PADDING_TOP_SLIDER + global FACE_MASK_PADDING_RIGHT_SLIDER + global FACE_MASK_PADDING_BOTTOM_SLIDER + global FACE_MASK_PADDING_LEFT_SLIDER + global FACE_MASK_REGION_CHECKBOX_GROUP + + has_box_mask = 'box' in facefusion.globals.face_mask_types + has_region_mask = 'region' in facefusion.globals.face_mask_types + FACE_MASK_TYPES_CHECKBOX_GROUP = gradio.CheckboxGroup( + label = wording.get('uis.face_mask_types_checkbox_group'), + choices = facefusion.choices.face_mask_types, + value = facefusion.globals.face_mask_types + ) + with gradio.Group(visible = has_box_mask) as FACE_MASK_BOX_GROUP: + FACE_MASK_BLUR_SLIDER = gradio.Slider( + label = wording.get('uis.face_mask_blur_slider'), + step = facefusion.choices.face_mask_blur_range[1] - facefusion.choices.face_mask_blur_range[0], + minimum = facefusion.choices.face_mask_blur_range[0], + maximum = facefusion.choices.face_mask_blur_range[-1], + value = facefusion.globals.face_mask_blur + ) + with gradio.Row(): + FACE_MASK_PADDING_TOP_SLIDER = gradio.Slider( + label = wording.get('uis.face_mask_padding_top_slider'), + step = facefusion.choices.face_mask_padding_range[1] - facefusion.choices.face_mask_padding_range[0], + minimum = facefusion.choices.face_mask_padding_range[0], + maximum = facefusion.choices.face_mask_padding_range[-1], + value = facefusion.globals.face_mask_padding[0] + ) + FACE_MASK_PADDING_RIGHT_SLIDER = gradio.Slider( + label = wording.get('uis.face_mask_padding_right_slider'), + step = facefusion.choices.face_mask_padding_range[1] - facefusion.choices.face_mask_padding_range[0], + minimum = facefusion.choices.face_mask_padding_range[0], + maximum = facefusion.choices.face_mask_padding_range[-1], + value = facefusion.globals.face_mask_padding[1] + ) + with gradio.Row(): + FACE_MASK_PADDING_BOTTOM_SLIDER = gradio.Slider( + label = wording.get('uis.face_mask_padding_bottom_slider'), + step = facefusion.choices.face_mask_padding_range[1] - facefusion.choices.face_mask_padding_range[0], + minimum = facefusion.choices.face_mask_padding_range[0], + maximum = facefusion.choices.face_mask_padding_range[-1], + value = facefusion.globals.face_mask_padding[2] + ) + FACE_MASK_PADDING_LEFT_SLIDER = gradio.Slider( + label = wording.get('uis.face_mask_padding_left_slider'), + step = facefusion.choices.face_mask_padding_range[1] - facefusion.choices.face_mask_padding_range[0], + minimum = facefusion.choices.face_mask_padding_range[0], + maximum = facefusion.choices.face_mask_padding_range[-1], + value = facefusion.globals.face_mask_padding[3] + ) + with gradio.Row(): + FACE_MASK_REGION_CHECKBOX_GROUP = gradio.CheckboxGroup( + label = wording.get('uis.face_mask_region_checkbox_group'), + choices = facefusion.choices.face_mask_regions, + value = facefusion.globals.face_mask_regions, + visible = has_region_mask + ) + register_ui_component('face_mask_types_checkbox_group', FACE_MASK_TYPES_CHECKBOX_GROUP) + register_ui_component('face_mask_blur_slider', FACE_MASK_BLUR_SLIDER) + register_ui_component('face_mask_padding_top_slider', FACE_MASK_PADDING_TOP_SLIDER) + register_ui_component('face_mask_padding_right_slider', FACE_MASK_PADDING_RIGHT_SLIDER) + register_ui_component('face_mask_padding_bottom_slider', FACE_MASK_PADDING_BOTTOM_SLIDER) + register_ui_component('face_mask_padding_left_slider', FACE_MASK_PADDING_LEFT_SLIDER) + register_ui_component('face_mask_region_checkbox_group', FACE_MASK_REGION_CHECKBOX_GROUP) + + +def listen() -> None: + FACE_MASK_TYPES_CHECKBOX_GROUP.change(update_face_mask_type, inputs = FACE_MASK_TYPES_CHECKBOX_GROUP, outputs = [ FACE_MASK_TYPES_CHECKBOX_GROUP, FACE_MASK_BOX_GROUP, FACE_MASK_REGION_CHECKBOX_GROUP ]) + FACE_MASK_BLUR_SLIDER.release(update_face_mask_blur, inputs = FACE_MASK_BLUR_SLIDER) + FACE_MASK_REGION_CHECKBOX_GROUP.change(update_face_mask_regions, inputs = FACE_MASK_REGION_CHECKBOX_GROUP, outputs = FACE_MASK_REGION_CHECKBOX_GROUP) + face_mask_padding_sliders = [ FACE_MASK_PADDING_TOP_SLIDER, FACE_MASK_PADDING_RIGHT_SLIDER, FACE_MASK_PADDING_BOTTOM_SLIDER, FACE_MASK_PADDING_LEFT_SLIDER ] + for face_mask_padding_slider in face_mask_padding_sliders: + face_mask_padding_slider.release(update_face_mask_padding, inputs = face_mask_padding_sliders) + + +def update_face_mask_type(face_mask_types : List[FaceMaskType]) -> Tuple[gradio.CheckboxGroup, gradio.Group, gradio.CheckboxGroup]: + facefusion.globals.face_mask_types = face_mask_types or facefusion.choices.face_mask_types + has_box_mask = 'box' in face_mask_types + has_region_mask = 'region' in face_mask_types + return gradio.CheckboxGroup(value = facefusion.globals.face_mask_types), gradio.Group(visible = has_box_mask), gradio.CheckboxGroup(visible = has_region_mask) + + +def update_face_mask_blur(face_mask_blur : float) -> None: + facefusion.globals.face_mask_blur = face_mask_blur + + +def update_face_mask_padding(face_mask_padding_top : int, face_mask_padding_right : int, face_mask_padding_bottom : int, face_mask_padding_left : int) -> None: + facefusion.globals.face_mask_padding = (face_mask_padding_top, face_mask_padding_right, face_mask_padding_bottom, face_mask_padding_left) + + +def update_face_mask_regions(face_mask_regions : List[FaceMaskRegion]) -> gradio.CheckboxGroup: + facefusion.globals.face_mask_regions = face_mask_regions or facefusion.choices.face_mask_regions + return gradio.CheckboxGroup(value = facefusion.globals.face_mask_regions) diff --git a/facefusion/uis/components/face_selector.py b/facefusion/uis/components/face_selector.py new file mode 100644 index 0000000000000000000000000000000000000000..19fef55bac01288eab6f232ac796681d2bd58d0f --- /dev/null +++ b/facefusion/uis/components/face_selector.py @@ -0,0 +1,165 @@ +from typing import List, Optional, Tuple, Any, Dict + +import gradio + +import facefusion.globals +import facefusion.choices +from facefusion import wording +from facefusion.face_store import clear_static_faces, clear_reference_faces +from facefusion.vision import get_video_frame, read_static_image, normalize_frame_color +from facefusion.filesystem import is_image, is_video +from facefusion.face_analyser import get_many_faces +from facefusion.typing import VisionFrame, FaceSelectorMode +from facefusion.uis.core import get_ui_component, get_ui_components, register_ui_component + +FACE_SELECTOR_MODE_DROPDOWN : Optional[gradio.Dropdown] = None +REFERENCE_FACE_POSITION_GALLERY : Optional[gradio.Gallery] = None +REFERENCE_FACE_DISTANCE_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global FACE_SELECTOR_MODE_DROPDOWN + global REFERENCE_FACE_POSITION_GALLERY + global REFERENCE_FACE_DISTANCE_SLIDER + + reference_face_gallery_args : Dict[str, Any] =\ + { + 'label': wording.get('uis.reference_face_gallery'), + 'object_fit': 'cover', + 'columns': 8, + 'allow_preview': False, + 'visible': 'reference' in facefusion.globals.face_selector_mode + } + if is_image(facefusion.globals.target_path): + reference_frame = read_static_image(facefusion.globals.target_path) + reference_face_gallery_args['value'] = extract_gallery_frames(reference_frame) + if is_video(facefusion.globals.target_path): + reference_frame = get_video_frame(facefusion.globals.target_path, facefusion.globals.reference_frame_number) + reference_face_gallery_args['value'] = extract_gallery_frames(reference_frame) + FACE_SELECTOR_MODE_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.face_selector_mode_dropdown'), + choices = facefusion.choices.face_selector_modes, + value = facefusion.globals.face_selector_mode + ) + REFERENCE_FACE_POSITION_GALLERY = gradio.Gallery(**reference_face_gallery_args) + REFERENCE_FACE_DISTANCE_SLIDER = gradio.Slider( + label = wording.get('uis.reference_face_distance_slider'), + value = facefusion.globals.reference_face_distance, + step = facefusion.choices.reference_face_distance_range[1] - facefusion.choices.reference_face_distance_range[0], + minimum = facefusion.choices.reference_face_distance_range[0], + maximum = facefusion.choices.reference_face_distance_range[-1], + visible = 'reference' in facefusion.globals.face_selector_mode + ) + register_ui_component('face_selector_mode_dropdown', FACE_SELECTOR_MODE_DROPDOWN) + register_ui_component('reference_face_position_gallery', REFERENCE_FACE_POSITION_GALLERY) + register_ui_component('reference_face_distance_slider', REFERENCE_FACE_DISTANCE_SLIDER) + + +def listen() -> None: + FACE_SELECTOR_MODE_DROPDOWN.change(update_face_selector_mode, inputs = FACE_SELECTOR_MODE_DROPDOWN, outputs = [ REFERENCE_FACE_POSITION_GALLERY, REFERENCE_FACE_DISTANCE_SLIDER ]) + REFERENCE_FACE_POSITION_GALLERY.select(clear_and_update_reference_face_position) + REFERENCE_FACE_DISTANCE_SLIDER.release(update_reference_face_distance, inputs = REFERENCE_FACE_DISTANCE_SLIDER) + + for ui_component in get_ui_components( + [ + 'target_image', + 'target_video' + ]): + for method in [ 'upload', 'change', 'clear' ]: + getattr(ui_component, method)(update_reference_face_position) + getattr(ui_component, method)(update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY) + + for ui_component in get_ui_components( + [ + 'face_analyser_order_dropdown', + 'face_analyser_age_dropdown', + 'face_analyser_gender_dropdown' + ]): + ui_component.change(update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY) + + for ui_component in get_ui_components( + [ + 'face_detector_model_dropdown', + 'face_detector_size_dropdown' + ]): + ui_component.change(clear_and_update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY) + + for ui_component in get_ui_components( + [ + 'face_detector_score_slider', + 'face_landmarker_score_slider' + ]): + ui_component.release(clear_and_update_reference_position_gallery, outputs=REFERENCE_FACE_POSITION_GALLERY) + + preview_frame_slider = get_ui_component('preview_frame_slider') + if preview_frame_slider: + preview_frame_slider.change(update_reference_frame_number, inputs = preview_frame_slider) + preview_frame_slider.release(update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY) + + +def update_face_selector_mode(face_selector_mode : FaceSelectorMode) -> Tuple[gradio.Gallery, gradio.Slider]: + if face_selector_mode == 'many': + facefusion.globals.face_selector_mode = face_selector_mode + return gradio.Gallery(visible = False), gradio.Slider(visible = False) + if face_selector_mode == 'one': + facefusion.globals.face_selector_mode = face_selector_mode + return gradio.Gallery(visible = False), gradio.Slider(visible = False) + if face_selector_mode == 'reference': + facefusion.globals.face_selector_mode = face_selector_mode + return gradio.Gallery(visible = True), gradio.Slider(visible = True) + + +def clear_and_update_reference_face_position(event : gradio.SelectData) -> gradio.Gallery: + clear_reference_faces() + clear_static_faces() + update_reference_face_position(event.index) + return update_reference_position_gallery() + + +def update_reference_face_position(reference_face_position : int = 0) -> None: + facefusion.globals.reference_face_position = reference_face_position + + +def update_reference_face_distance(reference_face_distance : float) -> None: + facefusion.globals.reference_face_distance = reference_face_distance + + +def update_reference_frame_number(reference_frame_number : int) -> None: + facefusion.globals.reference_frame_number = reference_frame_number + + +def clear_and_update_reference_position_gallery() -> gradio.Gallery: + clear_reference_faces() + clear_static_faces() + return update_reference_position_gallery() + + +def update_reference_position_gallery() -> gradio.Gallery: + gallery_vision_frames = [] + if is_image(facefusion.globals.target_path): + temp_vision_frame = read_static_image(facefusion.globals.target_path) + gallery_vision_frames = extract_gallery_frames(temp_vision_frame) + if is_video(facefusion.globals.target_path): + temp_vision_frame = get_video_frame(facefusion.globals.target_path, facefusion.globals.reference_frame_number) + gallery_vision_frames = extract_gallery_frames(temp_vision_frame) + if gallery_vision_frames: + return gradio.Gallery(value = gallery_vision_frames) + return gradio.Gallery(value = None) + + +def extract_gallery_frames(temp_vision_frame : VisionFrame) -> List[VisionFrame]: + gallery_vision_frames = [] + faces = get_many_faces(temp_vision_frame) + + for face in faces: + start_x, start_y, end_x, end_y = map(int, face.bounding_box) + padding_x = int((end_x - start_x) * 0.25) + padding_y = int((end_y - start_y) * 0.25) + start_x = max(0, start_x - padding_x) + start_y = max(0, start_y - padding_y) + end_x = max(0, end_x + padding_x) + end_y = max(0, end_y + padding_y) + crop_vision_frame = temp_vision_frame[start_y:end_y, start_x:end_x] + crop_vision_frame = normalize_frame_color(crop_vision_frame) + gallery_vision_frames.append(crop_vision_frame) + return gallery_vision_frames diff --git a/facefusion/uis/components/frame_processors.py b/facefusion/uis/components/frame_processors.py new file mode 100644 index 0000000000000000000000000000000000000000..4195c63b45a8ec337f8c86e39fef0b27fac67bf2 --- /dev/null +++ b/facefusion/uis/components/frame_processors.py @@ -0,0 +1,40 @@ +from typing import List, Optional +import gradio + +import facefusion.globals +from facefusion import wording +from facefusion.processors.frame.core import load_frame_processor_module, clear_frame_processors_modules +from facefusion.filesystem import list_directory +from facefusion.uis.core import register_ui_component + +FRAME_PROCESSORS_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None + + +def render() -> None: + global FRAME_PROCESSORS_CHECKBOX_GROUP + + FRAME_PROCESSORS_CHECKBOX_GROUP = gradio.CheckboxGroup( + label = wording.get('uis.frame_processors_checkbox_group'), + choices = sort_frame_processors(facefusion.globals.frame_processors), + value = facefusion.globals.frame_processors + ) + register_ui_component('frame_processors_checkbox_group', FRAME_PROCESSORS_CHECKBOX_GROUP) + + +def listen() -> None: + FRAME_PROCESSORS_CHECKBOX_GROUP.change(update_frame_processors, inputs = FRAME_PROCESSORS_CHECKBOX_GROUP, outputs = FRAME_PROCESSORS_CHECKBOX_GROUP) + + +def update_frame_processors(frame_processors : List[str]) -> gradio.CheckboxGroup: + facefusion.globals.frame_processors = frame_processors + clear_frame_processors_modules() + for frame_processor in frame_processors: + frame_processor_module = load_frame_processor_module(frame_processor) + if not frame_processor_module.pre_check(): + return gradio.CheckboxGroup() + return gradio.CheckboxGroup(value = facefusion.globals.frame_processors, choices = sort_frame_processors(facefusion.globals.frame_processors)) + + +def sort_frame_processors(frame_processors : List[str]) -> list[str]: + available_frame_processors = list_directory('facefusion/processors/frame/modules') + return sorted(available_frame_processors, key = lambda frame_processor : frame_processors.index(frame_processor) if frame_processor in frame_processors else len(frame_processors)) diff --git a/facefusion/uis/components/frame_processors_options.py b/facefusion/uis/components/frame_processors_options.py new file mode 100644 index 0000000000000000000000000000000000000000..a705b53b451da22ebe27c8e76b743ccbf41c8d54 --- /dev/null +++ b/facefusion/uis/components/frame_processors_options.py @@ -0,0 +1,216 @@ +from typing import List, Optional, Tuple +import gradio + +import facefusion.globals +from facefusion import face_analyser, wording +from facefusion.processors.frame.core import load_frame_processor_module +from facefusion.processors.frame import globals as frame_processors_globals, choices as frame_processors_choices +from facefusion.processors.frame.typings import FaceDebuggerItem, FaceEnhancerModel, FaceSwapperModel, FrameColorizerModel, FrameEnhancerModel, LipSyncerModel +from facefusion.uis.core import get_ui_component, register_ui_component + +FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None +FACE_ENHANCER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None +FACE_ENHANCER_BLEND_SLIDER : Optional[gradio.Slider] = None +FACE_SWAPPER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None +FRAME_COLORIZER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None +FRAME_COLORIZER_BLEND_SLIDER : Optional[gradio.Slider] = None +FRAME_COLORIZER_SIZE_DROPDOWN : Optional[gradio.Dropdown] = None +FRAME_ENHANCER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None +FRAME_ENHANCER_BLEND_SLIDER : Optional[gradio.Slider] = None +LIP_SYNCER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None + + +def render() -> None: + global FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP + global FACE_ENHANCER_MODEL_DROPDOWN + global FACE_ENHANCER_BLEND_SLIDER + global FACE_SWAPPER_MODEL_DROPDOWN + global FRAME_COLORIZER_MODEL_DROPDOWN + global FRAME_COLORIZER_BLEND_SLIDER + global FRAME_COLORIZER_SIZE_DROPDOWN + global FRAME_ENHANCER_MODEL_DROPDOWN + global FRAME_ENHANCER_BLEND_SLIDER + global LIP_SYNCER_MODEL_DROPDOWN + + FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP = gradio.CheckboxGroup( + label = wording.get('uis.face_debugger_items_checkbox_group'), + choices = frame_processors_choices.face_debugger_items, + value = frame_processors_globals.face_debugger_items, + visible = 'face_debugger' in facefusion.globals.frame_processors + ) + FACE_ENHANCER_MODEL_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.face_enhancer_model_dropdown'), + choices = frame_processors_choices.face_enhancer_models, + value = frame_processors_globals.face_enhancer_model, + visible = 'face_enhancer' in facefusion.globals.frame_processors + ) + FACE_ENHANCER_BLEND_SLIDER = gradio.Slider( + label = wording.get('uis.face_enhancer_blend_slider'), + value = frame_processors_globals.face_enhancer_blend, + step = frame_processors_choices.face_enhancer_blend_range[1] - frame_processors_choices.face_enhancer_blend_range[0], + minimum = frame_processors_choices.face_enhancer_blend_range[0], + maximum = frame_processors_choices.face_enhancer_blend_range[-1], + visible = 'face_enhancer' in facefusion.globals.frame_processors + ) + FACE_SWAPPER_MODEL_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.face_swapper_model_dropdown'), + choices = frame_processors_choices.face_swapper_models, + value = frame_processors_globals.face_swapper_model, + visible = 'face_swapper' in facefusion.globals.frame_processors + ) + FRAME_COLORIZER_MODEL_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.frame_colorizer_model_dropdown'), + choices = frame_processors_choices.frame_colorizer_models, + value = frame_processors_globals.frame_colorizer_model, + visible = 'frame_colorizer' in facefusion.globals.frame_processors + ) + FRAME_COLORIZER_BLEND_SLIDER = gradio.Slider( + label = wording.get('uis.frame_colorizer_blend_slider'), + value = frame_processors_globals.frame_colorizer_blend, + step = frame_processors_choices.frame_colorizer_blend_range[1] - frame_processors_choices.frame_colorizer_blend_range[0], + minimum = frame_processors_choices.frame_colorizer_blend_range[0], + maximum = frame_processors_choices.frame_colorizer_blend_range[-1], + visible = 'frame_colorizer' in facefusion.globals.frame_processors + ) + FRAME_COLORIZER_SIZE_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.frame_colorizer_size_dropdown'), + choices = frame_processors_choices.frame_colorizer_sizes, + value = frame_processors_globals.frame_colorizer_size, + visible = 'frame_colorizer' in facefusion.globals.frame_processors + ) + FRAME_ENHANCER_MODEL_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.frame_enhancer_model_dropdown'), + choices = frame_processors_choices.frame_enhancer_models, + value = frame_processors_globals.frame_enhancer_model, + visible = 'frame_enhancer' in facefusion.globals.frame_processors + ) + FRAME_ENHANCER_BLEND_SLIDER = gradio.Slider( + label = wording.get('uis.frame_enhancer_blend_slider'), + value = frame_processors_globals.frame_enhancer_blend, + step = frame_processors_choices.frame_enhancer_blend_range[1] - frame_processors_choices.frame_enhancer_blend_range[0], + minimum = frame_processors_choices.frame_enhancer_blend_range[0], + maximum = frame_processors_choices.frame_enhancer_blend_range[-1], + visible = 'frame_enhancer' in facefusion.globals.frame_processors + ) + LIP_SYNCER_MODEL_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.lip_syncer_model_dropdown'), + choices = frame_processors_choices.lip_syncer_models, + value = frame_processors_globals.lip_syncer_model, + visible = 'lip_syncer' in facefusion.globals.frame_processors + ) + register_ui_component('face_debugger_items_checkbox_group', FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP) + register_ui_component('face_enhancer_model_dropdown', FACE_ENHANCER_MODEL_DROPDOWN) + register_ui_component('face_enhancer_blend_slider', FACE_ENHANCER_BLEND_SLIDER) + register_ui_component('face_swapper_model_dropdown', FACE_SWAPPER_MODEL_DROPDOWN) + register_ui_component('frame_colorizer_model_dropdown', FRAME_COLORIZER_MODEL_DROPDOWN) + register_ui_component('frame_colorizer_blend_slider', FRAME_COLORIZER_BLEND_SLIDER) + register_ui_component('frame_colorizer_size_dropdown', FRAME_COLORIZER_SIZE_DROPDOWN) + register_ui_component('frame_enhancer_model_dropdown', FRAME_ENHANCER_MODEL_DROPDOWN) + register_ui_component('frame_enhancer_blend_slider', FRAME_ENHANCER_BLEND_SLIDER) + register_ui_component('lip_syncer_model_dropdown', LIP_SYNCER_MODEL_DROPDOWN) + + +def listen() -> None: + FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP.change(update_face_debugger_items, inputs = FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP) + FACE_ENHANCER_MODEL_DROPDOWN.change(update_face_enhancer_model, inputs = FACE_ENHANCER_MODEL_DROPDOWN, outputs = FACE_ENHANCER_MODEL_DROPDOWN) + FACE_ENHANCER_BLEND_SLIDER.release(update_face_enhancer_blend, inputs = FACE_ENHANCER_BLEND_SLIDER) + FACE_SWAPPER_MODEL_DROPDOWN.change(update_face_swapper_model, inputs = FACE_SWAPPER_MODEL_DROPDOWN, outputs = FACE_SWAPPER_MODEL_DROPDOWN) + FRAME_COLORIZER_MODEL_DROPDOWN.change(update_frame_colorizer_model, inputs = FRAME_COLORIZER_MODEL_DROPDOWN, outputs = FRAME_COLORIZER_MODEL_DROPDOWN) + FRAME_COLORIZER_BLEND_SLIDER.release(update_frame_colorizer_blend, inputs = FRAME_COLORIZER_BLEND_SLIDER) + FRAME_COLORIZER_SIZE_DROPDOWN.change(update_frame_colorizer_size, inputs = FRAME_COLORIZER_SIZE_DROPDOWN, outputs = FRAME_COLORIZER_SIZE_DROPDOWN) + FRAME_ENHANCER_MODEL_DROPDOWN.change(update_frame_enhancer_model, inputs = FRAME_ENHANCER_MODEL_DROPDOWN, outputs = FRAME_ENHANCER_MODEL_DROPDOWN) + FRAME_ENHANCER_BLEND_SLIDER.release(update_frame_enhancer_blend, inputs = FRAME_ENHANCER_BLEND_SLIDER) + LIP_SYNCER_MODEL_DROPDOWN.change(update_lip_syncer_model, inputs = LIP_SYNCER_MODEL_DROPDOWN, outputs = LIP_SYNCER_MODEL_DROPDOWN) + frame_processors_checkbox_group = get_ui_component('frame_processors_checkbox_group') + if frame_processors_checkbox_group: + frame_processors_checkbox_group.change(update_frame_processors, inputs = frame_processors_checkbox_group, outputs = [ FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP, FACE_ENHANCER_MODEL_DROPDOWN, FACE_ENHANCER_BLEND_SLIDER, FACE_SWAPPER_MODEL_DROPDOWN, FRAME_COLORIZER_MODEL_DROPDOWN, FRAME_COLORIZER_BLEND_SLIDER, FRAME_COLORIZER_SIZE_DROPDOWN, FRAME_ENHANCER_MODEL_DROPDOWN, FRAME_ENHANCER_BLEND_SLIDER, LIP_SYNCER_MODEL_DROPDOWN ]) + + +def update_frame_processors(frame_processors : List[str]) -> Tuple[gradio.CheckboxGroup, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown]: + has_face_debugger = 'face_debugger' in frame_processors + has_face_enhancer = 'face_enhancer' in frame_processors + has_face_swapper = 'face_swapper' in frame_processors + has_frame_colorizer = 'frame_colorizer' in frame_processors + has_frame_enhancer = 'frame_enhancer' in frame_processors + has_lip_syncer = 'lip_syncer' in frame_processors + return gradio.CheckboxGroup(visible = has_face_debugger), gradio.Dropdown(visible = has_face_enhancer), gradio.Slider(visible = has_face_enhancer), gradio.Dropdown(visible = has_face_swapper), gradio.Dropdown(visible = has_frame_colorizer), gradio.Slider(visible = has_frame_colorizer), gradio.Dropdown(visible = has_frame_colorizer), gradio.Dropdown(visible = has_frame_enhancer), gradio.Slider(visible = has_frame_enhancer), gradio.Dropdown(visible = has_lip_syncer) + + +def update_face_debugger_items(face_debugger_items : List[FaceDebuggerItem]) -> None: + frame_processors_globals.face_debugger_items = face_debugger_items + + +def update_face_enhancer_model(face_enhancer_model : FaceEnhancerModel) -> gradio.Dropdown: + frame_processors_globals.face_enhancer_model = face_enhancer_model + face_enhancer_module = load_frame_processor_module('face_enhancer') + face_enhancer_module.clear_frame_processor() + face_enhancer_module.set_options('model', face_enhancer_module.MODELS[face_enhancer_model]) + if face_enhancer_module.pre_check(): + return gradio.Dropdown(value = frame_processors_globals.face_enhancer_model) + return gradio.Dropdown() + + +def update_face_enhancer_blend(face_enhancer_blend : int) -> None: + frame_processors_globals.face_enhancer_blend = face_enhancer_blend + + +def update_face_swapper_model(face_swapper_model : FaceSwapperModel) -> gradio.Dropdown: + frame_processors_globals.face_swapper_model = face_swapper_model + if face_swapper_model == 'blendswap_256': + facefusion.globals.face_recognizer_model = 'arcface_blendswap' + if face_swapper_model == 'inswapper_128' or face_swapper_model == 'inswapper_128_fp16': + facefusion.globals.face_recognizer_model = 'arcface_inswapper' + if face_swapper_model == 'simswap_256' or face_swapper_model == 'simswap_512_unofficial': + facefusion.globals.face_recognizer_model = 'arcface_simswap' + if face_swapper_model == 'uniface_256': + facefusion.globals.face_recognizer_model = 'arcface_uniface' + face_swapper_module = load_frame_processor_module('face_swapper') + face_swapper_module.clear_model_initializer() + face_swapper_module.clear_frame_processor() + face_swapper_module.set_options('model', face_swapper_module.MODELS[face_swapper_model]) + if face_analyser.pre_check() and face_swapper_module.pre_check(): + return gradio.Dropdown(value = frame_processors_globals.face_swapper_model) + return gradio.Dropdown() + + +def update_frame_colorizer_model(frame_colorizer_model : FrameColorizerModel) -> gradio.Dropdown: + frame_processors_globals.frame_colorizer_model = frame_colorizer_model + frame_colorizer_module = load_frame_processor_module('frame_colorizer') + frame_colorizer_module.clear_frame_processor() + frame_colorizer_module.set_options('model', frame_colorizer_module.MODELS[frame_colorizer_model]) + if frame_colorizer_module.pre_check(): + return gradio.Dropdown(value = frame_processors_globals.frame_colorizer_model) + return gradio.Dropdown() + + +def update_frame_colorizer_blend(frame_colorizer_blend : int) -> None: + frame_processors_globals.frame_colorizer_blend = frame_colorizer_blend + + +def update_frame_colorizer_size(frame_colorizer_size : str) -> gradio.Dropdown: + frame_processors_globals.frame_colorizer_size = frame_colorizer_size + return gradio.Dropdown(value = frame_processors_globals.frame_colorizer_size) + + +def update_frame_enhancer_model(frame_enhancer_model : FrameEnhancerModel) -> gradio.Dropdown: + frame_processors_globals.frame_enhancer_model = frame_enhancer_model + frame_enhancer_module = load_frame_processor_module('frame_enhancer') + frame_enhancer_module.clear_frame_processor() + frame_enhancer_module.set_options('model', frame_enhancer_module.MODELS[frame_enhancer_model]) + if frame_enhancer_module.pre_check(): + return gradio.Dropdown(value = frame_processors_globals.frame_enhancer_model) + return gradio.Dropdown() + + +def update_frame_enhancer_blend(frame_enhancer_blend : int) -> None: + frame_processors_globals.frame_enhancer_blend = frame_enhancer_blend + + +def update_lip_syncer_model(lip_syncer_model : LipSyncerModel) -> gradio.Dropdown: + frame_processors_globals.lip_syncer_model = lip_syncer_model + lip_syncer_module = load_frame_processor_module('lip_syncer') + lip_syncer_module.clear_frame_processor() + lip_syncer_module.set_options('model', lip_syncer_module.MODELS[lip_syncer_model]) + if lip_syncer_module.pre_check(): + return gradio.Dropdown(value = frame_processors_globals.lip_syncer_model) + return gradio.Dropdown() diff --git a/facefusion/uis/components/memory.py b/facefusion/uis/components/memory.py new file mode 100644 index 0000000000000000000000000000000000000000..f67c27ae6256b807d909aff104e740d5b4b510b8 --- /dev/null +++ b/facefusion/uis/components/memory.py @@ -0,0 +1,41 @@ +from typing import Optional +import gradio + +import facefusion.globals +import facefusion.choices +from facefusion.typing import VideoMemoryStrategy +from facefusion import wording + +VIDEO_MEMORY_STRATEGY_DROPDOWN : Optional[gradio.Dropdown] = None +SYSTEM_MEMORY_LIMIT_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global VIDEO_MEMORY_STRATEGY_DROPDOWN + global SYSTEM_MEMORY_LIMIT_SLIDER + + VIDEO_MEMORY_STRATEGY_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.video_memory_strategy_dropdown'), + choices = facefusion.choices.video_memory_strategies, + value = facefusion.globals.video_memory_strategy + ) + SYSTEM_MEMORY_LIMIT_SLIDER = gradio.Slider( + label = wording.get('uis.system_memory_limit_slider'), + step =facefusion.choices.system_memory_limit_range[1] - facefusion.choices.system_memory_limit_range[0], + minimum = facefusion.choices.system_memory_limit_range[0], + maximum = facefusion.choices.system_memory_limit_range[-1], + value = facefusion.globals.system_memory_limit + ) + + +def listen() -> None: + VIDEO_MEMORY_STRATEGY_DROPDOWN.change(update_video_memory_strategy, inputs = VIDEO_MEMORY_STRATEGY_DROPDOWN) + SYSTEM_MEMORY_LIMIT_SLIDER.release(update_system_memory_limit, inputs = SYSTEM_MEMORY_LIMIT_SLIDER) + + +def update_video_memory_strategy(video_memory_strategy : VideoMemoryStrategy) -> None: + facefusion.globals.video_memory_strategy = video_memory_strategy + + +def update_system_memory_limit(system_memory_limit : int) -> None: + facefusion.globals.system_memory_limit = system_memory_limit diff --git a/facefusion/uis/components/output.py b/facefusion/uis/components/output.py new file mode 100644 index 0000000000000000000000000000000000000000..cfba2a6d1546eca752ca110aebbe00ab7e0e9be1 --- /dev/null +++ b/facefusion/uis/components/output.py @@ -0,0 +1,88 @@ +from typing import Tuple, Optional +from time import sleep +import gradio + +import facefusion.globals +from facefusion import process_manager, wording +from facefusion.core import conditional_process +from facefusion.memory import limit_system_memory +from facefusion.normalizer import normalize_output_path +from facefusion.uis.core import get_ui_component +from facefusion.filesystem import clear_temp, is_image, is_video + +OUTPUT_IMAGE : Optional[gradio.Image] = None +OUTPUT_VIDEO : Optional[gradio.Video] = None +OUTPUT_START_BUTTON : Optional[gradio.Button] = None +OUTPUT_CLEAR_BUTTON : Optional[gradio.Button] = None +OUTPUT_STOP_BUTTON : Optional[gradio.Button] = None + + +def render() -> None: + global OUTPUT_IMAGE + global OUTPUT_VIDEO + global OUTPUT_START_BUTTON + global OUTPUT_STOP_BUTTON + global OUTPUT_CLEAR_BUTTON + + OUTPUT_IMAGE = gradio.Image( + label = wording.get('uis.output_image_or_video'), + visible = False + ) + OUTPUT_VIDEO = gradio.Video( + label = wording.get('uis.output_image_or_video') + ) + OUTPUT_START_BUTTON = gradio.Button( + value = wording.get('uis.start_button'), + variant = 'primary', + size = 'sm' + ) + OUTPUT_STOP_BUTTON = gradio.Button( + value = wording.get('uis.stop_button'), + variant = 'primary', + size = 'sm', + visible = False + ) + OUTPUT_CLEAR_BUTTON = gradio.Button( + value = wording.get('uis.clear_button'), + size = 'sm' + ) + + +def listen() -> None: + output_path_textbox = get_ui_component('output_path_textbox') + if output_path_textbox: + OUTPUT_START_BUTTON.click(start, outputs = [ OUTPUT_START_BUTTON, OUTPUT_STOP_BUTTON ]) + OUTPUT_START_BUTTON.click(process, outputs = [ OUTPUT_IMAGE, OUTPUT_VIDEO, OUTPUT_START_BUTTON, OUTPUT_STOP_BUTTON ]) + OUTPUT_STOP_BUTTON.click(stop, outputs = [ OUTPUT_START_BUTTON, OUTPUT_STOP_BUTTON ]) + OUTPUT_CLEAR_BUTTON.click(clear, outputs = [ OUTPUT_IMAGE, OUTPUT_VIDEO ]) + + +def start() -> Tuple[gradio.Button, gradio.Button]: + while not process_manager.is_processing(): + sleep(0.5) + return gradio.Button(visible = False), gradio.Button(visible = True) + + +def process() -> Tuple[gradio.Image, gradio.Video, gradio.Button, gradio.Button]: + normed_output_path = normalize_output_path(facefusion.globals.target_path, facefusion.globals.output_path) + if facefusion.globals.system_memory_limit > 0: + limit_system_memory(facefusion.globals.system_memory_limit) + conditional_process() + if is_image(normed_output_path): + return gradio.Image(value = normed_output_path, visible = True), gradio.Video(value = None, visible = False), gradio.Button(visible = True), gradio.Button(visible = False) + if is_video(normed_output_path): + return gradio.Image(value = None, visible = False), gradio.Video(value = normed_output_path, visible = True), gradio.Button(visible = True), gradio.Button(visible = False) + return gradio.Image(value = None), gradio.Video(value = None), gradio.Button(visible = True), gradio.Button(visible = False) + + +def stop() -> Tuple[gradio.Button, gradio.Button]: + process_manager.stop() + return gradio.Button(visible = True), gradio.Button(visible = False) + + +def clear() -> Tuple[gradio.Image, gradio.Video]: + while process_manager.is_processing(): + sleep(0.5) + if facefusion.globals.target_path: + clear_temp(facefusion.globals.target_path) + return gradio.Image(value = None), gradio.Video(value = None) diff --git a/facefusion/uis/components/output_options.py b/facefusion/uis/components/output_options.py new file mode 100644 index 0000000000000000000000000000000000000000..4919920a397a9f5865bb49a4eda5cdd2093a8c0b --- /dev/null +++ b/facefusion/uis/components/output_options.py @@ -0,0 +1,161 @@ +from typing import Optional, Tuple +import gradio + +import facefusion.globals +import facefusion.choices +from facefusion import wording +from facefusion.typing import OutputVideoEncoder, OutputVideoPreset, Fps +from facefusion.filesystem import is_image, is_video +from facefusion.uis.core import get_ui_components, register_ui_component +from facefusion.vision import detect_image_resolution, create_image_resolutions, detect_video_fps, detect_video_resolution, create_video_resolutions, pack_resolution + +OUTPUT_PATH_TEXTBOX : Optional[gradio.Textbox] = None +OUTPUT_IMAGE_QUALITY_SLIDER : Optional[gradio.Slider] = None +OUTPUT_IMAGE_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None +OUTPUT_VIDEO_ENCODER_DROPDOWN : Optional[gradio.Dropdown] = None +OUTPUT_VIDEO_PRESET_DROPDOWN : Optional[gradio.Dropdown] = None +OUTPUT_VIDEO_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None +OUTPUT_VIDEO_QUALITY_SLIDER : Optional[gradio.Slider] = None +OUTPUT_VIDEO_FPS_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global OUTPUT_PATH_TEXTBOX + global OUTPUT_IMAGE_QUALITY_SLIDER + global OUTPUT_IMAGE_RESOLUTION_DROPDOWN + global OUTPUT_VIDEO_ENCODER_DROPDOWN + global OUTPUT_VIDEO_PRESET_DROPDOWN + global OUTPUT_VIDEO_RESOLUTION_DROPDOWN + global OUTPUT_VIDEO_QUALITY_SLIDER + global OUTPUT_VIDEO_FPS_SLIDER + + output_image_resolutions = [] + output_video_resolutions = [] + if is_image(facefusion.globals.target_path): + output_image_resolution = detect_image_resolution(facefusion.globals.target_path) + output_image_resolutions = create_image_resolutions(output_image_resolution) + if is_video(facefusion.globals.target_path): + output_video_resolution = detect_video_resolution(facefusion.globals.target_path) + output_video_resolutions = create_video_resolutions(output_video_resolution) + facefusion.globals.output_path = facefusion.globals.output_path or '.' + OUTPUT_PATH_TEXTBOX = gradio.Textbox( + label = wording.get('uis.output_path_textbox'), + value = facefusion.globals.output_path, + max_lines = 1 + ) + OUTPUT_IMAGE_QUALITY_SLIDER = gradio.Slider( + label = wording.get('uis.output_image_quality_slider'), + value = facefusion.globals.output_image_quality, + step = facefusion.choices.output_image_quality_range[1] - facefusion.choices.output_image_quality_range[0], + minimum = facefusion.choices.output_image_quality_range[0], + maximum = facefusion.choices.output_image_quality_range[-1], + visible = is_image(facefusion.globals.target_path) + ) + OUTPUT_IMAGE_RESOLUTION_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.output_image_resolution_dropdown'), + choices = output_image_resolutions, + value = facefusion.globals.output_image_resolution, + visible = is_image(facefusion.globals.target_path) + ) + OUTPUT_VIDEO_ENCODER_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.output_video_encoder_dropdown'), + choices = facefusion.choices.output_video_encoders, + value = facefusion.globals.output_video_encoder, + visible = is_video(facefusion.globals.target_path) + ) + OUTPUT_VIDEO_PRESET_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.output_video_preset_dropdown'), + choices = facefusion.choices.output_video_presets, + value = facefusion.globals.output_video_preset, + visible = is_video(facefusion.globals.target_path) + ) + OUTPUT_VIDEO_QUALITY_SLIDER = gradio.Slider( + label = wording.get('uis.output_video_quality_slider'), + value = facefusion.globals.output_video_quality, + step = facefusion.choices.output_video_quality_range[1] - facefusion.choices.output_video_quality_range[0], + minimum = facefusion.choices.output_video_quality_range[0], + maximum = facefusion.choices.output_video_quality_range[-1], + visible = is_video(facefusion.globals.target_path) + ) + OUTPUT_VIDEO_RESOLUTION_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.output_video_resolution_dropdown'), + choices = output_video_resolutions, + value = facefusion.globals.output_video_resolution, + visible = is_video(facefusion.globals.target_path) + ) + OUTPUT_VIDEO_FPS_SLIDER = gradio.Slider( + label = wording.get('uis.output_video_fps_slider'), + value = facefusion.globals.output_video_fps, + step = 0.01, + minimum = 1, + maximum = 60, + visible = is_video(facefusion.globals.target_path) + ) + register_ui_component('output_path_textbox', OUTPUT_PATH_TEXTBOX) + register_ui_component('output_video_fps_slider', OUTPUT_VIDEO_FPS_SLIDER) + + +def listen() -> None: + OUTPUT_PATH_TEXTBOX.change(update_output_path, inputs = OUTPUT_PATH_TEXTBOX) + OUTPUT_IMAGE_QUALITY_SLIDER.release(update_output_image_quality, inputs = OUTPUT_IMAGE_QUALITY_SLIDER) + OUTPUT_IMAGE_RESOLUTION_DROPDOWN.change(update_output_image_resolution, inputs = OUTPUT_IMAGE_RESOLUTION_DROPDOWN) + OUTPUT_VIDEO_ENCODER_DROPDOWN.change(update_output_video_encoder, inputs = OUTPUT_VIDEO_ENCODER_DROPDOWN) + OUTPUT_VIDEO_PRESET_DROPDOWN.change(update_output_video_preset, inputs = OUTPUT_VIDEO_PRESET_DROPDOWN) + OUTPUT_VIDEO_QUALITY_SLIDER.release(update_output_video_quality, inputs = OUTPUT_VIDEO_QUALITY_SLIDER) + OUTPUT_VIDEO_RESOLUTION_DROPDOWN.change(update_output_video_resolution, inputs = OUTPUT_VIDEO_RESOLUTION_DROPDOWN) + OUTPUT_VIDEO_FPS_SLIDER.release(update_output_video_fps, inputs = OUTPUT_VIDEO_FPS_SLIDER) + + for ui_component in get_ui_components( + [ + 'target_image', + 'target_video' + ]): + for method in [ 'upload', 'change', 'clear' ]: + getattr(ui_component, method)(remote_update, outputs = [ OUTPUT_IMAGE_QUALITY_SLIDER, OUTPUT_IMAGE_RESOLUTION_DROPDOWN, OUTPUT_VIDEO_ENCODER_DROPDOWN, OUTPUT_VIDEO_PRESET_DROPDOWN, OUTPUT_VIDEO_QUALITY_SLIDER, OUTPUT_VIDEO_RESOLUTION_DROPDOWN, OUTPUT_VIDEO_FPS_SLIDER ]) + + +def remote_update() -> Tuple[gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Slider]: + if is_image(facefusion.globals.target_path): + output_image_resolution = detect_image_resolution(facefusion.globals.target_path) + output_image_resolutions = create_image_resolutions(output_image_resolution) + facefusion.globals.output_image_resolution = pack_resolution(output_image_resolution) + return gradio.Slider(visible = True), gradio.Dropdown(visible = True, value = facefusion.globals.output_image_resolution, choices = output_image_resolutions), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False, value = None, choices = None), gradio.Slider(visible = False, value = None) + if is_video(facefusion.globals.target_path): + output_video_resolution = detect_video_resolution(facefusion.globals.target_path) + output_video_resolutions = create_video_resolutions(output_video_resolution) + facefusion.globals.output_video_resolution = pack_resolution(output_video_resolution) + facefusion.globals.output_video_fps = detect_video_fps(facefusion.globals.target_path) + return gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = True), gradio.Dropdown(visible = True), gradio.Slider(visible = True), gradio.Dropdown(visible = True, value = facefusion.globals.output_video_resolution, choices = output_video_resolutions), gradio.Slider(visible = True, value = facefusion.globals.output_video_fps) + return gradio.Slider(visible = False), gradio.Dropdown(visible = False, value = None, choices = None), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False, value = None, choices = None), gradio.Slider(visible = False, value = None) + + +def update_output_path(output_path : str) -> None: + facefusion.globals.output_path = output_path + + +def update_output_image_quality(output_image_quality : int) -> None: + facefusion.globals.output_image_quality = output_image_quality + + +def update_output_image_resolution(output_image_resolution : str) -> None: + facefusion.globals.output_image_resolution = output_image_resolution + + +def update_output_video_encoder(output_video_encoder: OutputVideoEncoder) -> None: + facefusion.globals.output_video_encoder = output_video_encoder + + +def update_output_video_preset(output_video_preset : OutputVideoPreset) -> None: + facefusion.globals.output_video_preset = output_video_preset + + +def update_output_video_quality(output_video_quality : int) -> None: + facefusion.globals.output_video_quality = output_video_quality + + +def update_output_video_resolution(output_video_resolution : str) -> None: + facefusion.globals.output_video_resolution = output_video_resolution + + +def update_output_video_fps(output_video_fps : Fps) -> None: + facefusion.globals.output_video_fps = output_video_fps diff --git a/facefusion/uis/components/preview.py b/facefusion/uis/components/preview.py new file mode 100644 index 0000000000000000000000000000000000000000..72108cf4e56aa89df0bd073040f13f39434b3572 --- /dev/null +++ b/facefusion/uis/components/preview.py @@ -0,0 +1,207 @@ +from typing import Any, Dict, Optional +from time import sleep +import cv2 +import gradio +import numpy + +import facefusion.globals +from facefusion import logger, wording +from facefusion.audio import get_audio_frame, create_empty_audio_frame +from facefusion.common_helper import get_first +from facefusion.core import conditional_append_reference_faces +from facefusion.face_analyser import get_average_face, clear_face_analyser +from facefusion.face_store import clear_static_faces, get_reference_faces, clear_reference_faces +from facefusion.typing import Face, FaceSet, AudioFrame, VisionFrame +from facefusion.vision import get_video_frame, count_video_frame_total, normalize_frame_color, resize_frame_resolution, read_static_image, read_static_images +from facefusion.filesystem import is_image, is_video, filter_audio_paths +from facefusion.content_analyser import analyse_frame +from facefusion.processors.frame.core import load_frame_processor_module +from facefusion.uis.core import get_ui_component, get_ui_components, register_ui_component + +PREVIEW_IMAGE : Optional[gradio.Image] = None +PREVIEW_FRAME_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global PREVIEW_IMAGE + global PREVIEW_FRAME_SLIDER + + preview_image_args : Dict[str, Any] =\ + { + 'label': wording.get('uis.preview_image'), + 'interactive': False + } + preview_frame_slider_args : Dict[str, Any] =\ + { + 'label': wording.get('uis.preview_frame_slider'), + 'step': 1, + 'minimum': 0, + 'maximum': 100, + 'visible': False + } + conditional_append_reference_faces() + reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None + source_frames = read_static_images(facefusion.globals.source_paths) + source_face = get_average_face(source_frames) + source_audio_path = get_first(filter_audio_paths(facefusion.globals.source_paths)) + source_audio_frame = create_empty_audio_frame() + if source_audio_path and facefusion.globals.output_video_fps and facefusion.globals.reference_frame_number: + temp_audio_frame = get_audio_frame(source_audio_path, facefusion.globals.output_video_fps, facefusion.globals.reference_frame_number) + if numpy.any(temp_audio_frame): + source_audio_frame = temp_audio_frame + if is_image(facefusion.globals.target_path): + target_vision_frame = read_static_image(facefusion.globals.target_path) + preview_vision_frame = process_preview_frame(reference_faces, source_face, source_audio_frame, target_vision_frame) + preview_image_args['value'] = normalize_frame_color(preview_vision_frame) + if is_video(facefusion.globals.target_path): + temp_vision_frame = get_video_frame(facefusion.globals.target_path, facefusion.globals.reference_frame_number) + preview_vision_frame = process_preview_frame(reference_faces, source_face, source_audio_frame, temp_vision_frame) + preview_image_args['value'] = normalize_frame_color(preview_vision_frame) + preview_image_args['visible'] = True + preview_frame_slider_args['value'] = facefusion.globals.reference_frame_number + preview_frame_slider_args['maximum'] = count_video_frame_total(facefusion.globals.target_path) + preview_frame_slider_args['visible'] = True + PREVIEW_IMAGE = gradio.Image(**preview_image_args) + PREVIEW_FRAME_SLIDER = gradio.Slider(**preview_frame_slider_args) + register_ui_component('preview_frame_slider', PREVIEW_FRAME_SLIDER) + + +def listen() -> None: + PREVIEW_FRAME_SLIDER.release(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + reference_face_position_gallery = get_ui_component('reference_face_position_gallery') + if reference_face_position_gallery: + reference_face_position_gallery.select(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + + for ui_component in get_ui_components( + [ + 'source_audio', + 'source_image', + 'target_image', + 'target_video' + ]): + for method in [ 'upload', 'change', 'clear' ]: + getattr(ui_component, method)(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + + for ui_component in get_ui_components( + [ + 'target_image', + 'target_video' + ]): + for method in [ 'upload', 'change', 'clear' ]: + getattr(ui_component, method)(update_preview_frame_slider, outputs = PREVIEW_FRAME_SLIDER) + + for ui_component in get_ui_components( + [ + 'face_debugger_items_checkbox_group', + 'frame_colorizer_size_dropdown', + 'face_selector_mode_dropdown', + 'face_mask_types_checkbox_group', + 'face_mask_region_checkbox_group', + 'face_analyser_order_dropdown', + 'face_analyser_age_dropdown', + 'face_analyser_gender_dropdown' + ]): + ui_component.change(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + + for ui_component in get_ui_components( + [ + 'face_enhancer_blend_slider', + 'frame_colorizer_blend_slider', + 'frame_enhancer_blend_slider', + 'trim_frame_start_slider', + 'trim_frame_end_slider', + 'reference_face_distance_slider', + 'face_mask_blur_slider', + 'face_mask_padding_top_slider', + 'face_mask_padding_bottom_slider', + 'face_mask_padding_left_slider', + 'face_mask_padding_right_slider', + 'output_video_fps_slider' + ]): + ui_component.release(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + + for ui_component in get_ui_components( + [ + 'frame_processors_checkbox_group', + 'face_enhancer_model_dropdown', + 'face_swapper_model_dropdown', + 'frame_colorizer_model_dropdown', + 'frame_enhancer_model_dropdown', + 'lip_syncer_model_dropdown', + 'face_detector_model_dropdown', + 'face_detector_size_dropdown' + ]): + ui_component.change(clear_and_update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + + for ui_component in get_ui_components( + [ + 'face_detector_score_slider', + 'face_landmarker_score_slider' + ]): + ui_component.release(clear_and_update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + + +def clear_and_update_preview_image(frame_number : int = 0) -> gradio.Image: + clear_face_analyser() + clear_reference_faces() + clear_static_faces() + return update_preview_image(frame_number) + + +def update_preview_image(frame_number : int = 0) -> gradio.Image: + for frame_processor in facefusion.globals.frame_processors: + frame_processor_module = load_frame_processor_module(frame_processor) + while not frame_processor_module.post_check(): + logger.disable() + sleep(0.5) + logger.enable() + conditional_append_reference_faces() + reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None + source_frames = read_static_images(facefusion.globals.source_paths) + source_face = get_average_face(source_frames) + source_audio_path = get_first(filter_audio_paths(facefusion.globals.source_paths)) + source_audio_frame = create_empty_audio_frame() + if source_audio_path and facefusion.globals.output_video_fps and facefusion.globals.reference_frame_number: + reference_audio_frame_number = facefusion.globals.reference_frame_number + if facefusion.globals.trim_frame_start: + reference_audio_frame_number -= facefusion.globals.trim_frame_start + temp_audio_frame = get_audio_frame(source_audio_path, facefusion.globals.output_video_fps, reference_audio_frame_number) + if numpy.any(temp_audio_frame): + source_audio_frame = temp_audio_frame + if is_image(facefusion.globals.target_path): + target_vision_frame = read_static_image(facefusion.globals.target_path) + preview_vision_frame = process_preview_frame(reference_faces, source_face, source_audio_frame, target_vision_frame) + preview_vision_frame = normalize_frame_color(preview_vision_frame) + return gradio.Image(value = preview_vision_frame) + if is_video(facefusion.globals.target_path): + temp_vision_frame = get_video_frame(facefusion.globals.target_path, frame_number) + preview_vision_frame = process_preview_frame(reference_faces, source_face, source_audio_frame, temp_vision_frame) + preview_vision_frame = normalize_frame_color(preview_vision_frame) + return gradio.Image(value = preview_vision_frame) + return gradio.Image(value = None) + + +def update_preview_frame_slider() -> gradio.Slider: + if is_video(facefusion.globals.target_path): + video_frame_total = count_video_frame_total(facefusion.globals.target_path) + return gradio.Slider(maximum = video_frame_total, visible = True) + return gradio.Slider(value = None, maximum = None, visible = False) + + +def process_preview_frame(reference_faces : FaceSet, source_face : Face, source_audio_frame : AudioFrame, target_vision_frame : VisionFrame) -> VisionFrame: + target_vision_frame = resize_frame_resolution(target_vision_frame, (640, 640)) + if analyse_frame(target_vision_frame): + return cv2.GaussianBlur(target_vision_frame, (99, 99), 0) + for frame_processor in facefusion.globals.frame_processors: + frame_processor_module = load_frame_processor_module(frame_processor) + logger.disable() + if frame_processor_module.pre_process('preview'): + logger.enable() + target_vision_frame = frame_processor_module.process_frame( + { + 'reference_faces': reference_faces, + 'source_face': source_face, + 'source_audio_frame': source_audio_frame, + 'target_vision_frame': target_vision_frame + }) + return target_vision_frame diff --git a/facefusion/uis/components/source.py b/facefusion/uis/components/source.py new file mode 100644 index 0000000000000000000000000000000000000000..4e79d89db75f8a67a0c5fafba3f2175fcd871feb --- /dev/null +++ b/facefusion/uis/components/source.py @@ -0,0 +1,67 @@ +from typing import Optional, List, Tuple +import gradio + +import facefusion.globals +from facefusion import wording +from facefusion.uis.typing import File +from facefusion.common_helper import get_first +from facefusion.filesystem import has_audio, has_image, filter_audio_paths, filter_image_paths +from facefusion.uis.core import register_ui_component + +SOURCE_FILE : Optional[gradio.File] = None +SOURCE_AUDIO : Optional[gradio.Audio] = None +SOURCE_IMAGE : Optional[gradio.Image] = None + + +def render() -> None: + global SOURCE_FILE + global SOURCE_AUDIO + global SOURCE_IMAGE + + has_source_audio = has_audio(facefusion.globals.source_paths) + has_source_image = has_image(facefusion.globals.source_paths) + SOURCE_FILE = gradio.File( + file_count = 'multiple', + file_types = + [ + '.mp3', + '.wav', + '.png', + '.jpg', + '.webp' + ], + label = wording.get('uis.source_file'), + value = facefusion.globals.source_paths if has_source_audio or has_source_image else None + ) + source_file_names = [ source_file_value['name'] for source_file_value in SOURCE_FILE.value ] if SOURCE_FILE.value else None + source_audio_path = get_first(filter_audio_paths(source_file_names)) + source_image_path = get_first(filter_image_paths(source_file_names)) + SOURCE_AUDIO = gradio.Audio( + value = source_audio_path if has_source_audio else None, + visible = has_source_audio, + show_label = False + ) + SOURCE_IMAGE = gradio.Image( + value = source_image_path if has_source_image else None, + visible = has_source_image, + show_label = False + ) + register_ui_component('source_audio', SOURCE_AUDIO) + register_ui_component('source_image', SOURCE_IMAGE) + + +def listen() -> None: + SOURCE_FILE.change(update, inputs = SOURCE_FILE, outputs = [ SOURCE_AUDIO, SOURCE_IMAGE ]) + + +def update(files : List[File]) -> Tuple[gradio.Audio, gradio.Image]: + file_names = [ file.name for file in files ] if files else None + has_source_audio = has_audio(file_names) + has_source_image = has_image(file_names) + if has_source_audio or has_source_image: + source_audio_path = get_first(filter_audio_paths(file_names)) + source_image_path = get_first(filter_image_paths(file_names)) + facefusion.globals.source_paths = file_names + return gradio.Audio(value = source_audio_path, visible = has_source_audio), gradio.Image(value = source_image_path, visible = has_source_image) + facefusion.globals.source_paths = None + return gradio.Audio(value = None, visible = False), gradio.Image(value = None, visible = False) diff --git a/facefusion/uis/components/target.py b/facefusion/uis/components/target.py new file mode 100644 index 0000000000000000000000000000000000000000..2d11d71339afbfdbdc359dc6b5b2c49c44da1c61 --- /dev/null +++ b/facefusion/uis/components/target.py @@ -0,0 +1,63 @@ +from typing import Tuple, Optional +import gradio + +import facefusion.globals +from facefusion import wording +from facefusion.face_store import clear_static_faces, clear_reference_faces +from facefusion.uis.typing import File +from facefusion.filesystem import is_image, is_video +from facefusion.uis.core import register_ui_component + +TARGET_FILE : Optional[gradio.File] = None +TARGET_IMAGE : Optional[gradio.Image] = None +TARGET_VIDEO : Optional[gradio.Video] = None + + +def render() -> None: + global TARGET_FILE + global TARGET_IMAGE + global TARGET_VIDEO + + is_target_image = is_image(facefusion.globals.target_path) + is_target_video = is_video(facefusion.globals.target_path) + TARGET_FILE = gradio.File( + label = wording.get('uis.target_file'), + file_count = 'single', + file_types = + [ + '.png', + '.jpg', + '.webp', + '.mp4' + ], + value = facefusion.globals.target_path if is_target_image or is_target_video else None + ) + TARGET_IMAGE = gradio.Image( + value = TARGET_FILE.value['name'] if is_target_image else None, + visible = is_target_image, + show_label = False + ) + TARGET_VIDEO = gradio.Video( + value = TARGET_FILE.value['name'] if is_target_video else None, + visible = is_target_video, + show_label = False + ) + register_ui_component('target_image', TARGET_IMAGE) + register_ui_component('target_video', TARGET_VIDEO) + + +def listen() -> None: + TARGET_FILE.change(update, inputs = TARGET_FILE, outputs = [ TARGET_IMAGE, TARGET_VIDEO ]) + + +def update(file : File) -> Tuple[gradio.Image, gradio.Video]: + clear_reference_faces() + clear_static_faces() + if file and is_image(file.name): + facefusion.globals.target_path = file.name + return gradio.Image(value = file.name, visible = True), gradio.Video(value = None, visible = False) + if file and is_video(file.name): + facefusion.globals.target_path = file.name + return gradio.Image(value = None, visible = False), gradio.Video(value = file.name, visible = True) + facefusion.globals.target_path = None + return gradio.Image(value = None, visible = False), gradio.Video(value = None, visible = False) diff --git a/facefusion/uis/components/temp_frame.py b/facefusion/uis/components/temp_frame.py new file mode 100644 index 0000000000000000000000000000000000000000..a601653fa3814efd4f6ac68e21bb319f7f6b5762 --- /dev/null +++ b/facefusion/uis/components/temp_frame.py @@ -0,0 +1,41 @@ +from typing import Optional +import gradio + +import facefusion.globals +import facefusion.choices +from facefusion import wording +from facefusion.typing import TempFrameFormat +from facefusion.filesystem import is_video +from facefusion.uis.core import get_ui_component + +TEMP_FRAME_FORMAT_DROPDOWN : Optional[gradio.Dropdown] = None + + +def render() -> None: + global TEMP_FRAME_FORMAT_DROPDOWN + + TEMP_FRAME_FORMAT_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.temp_frame_format_dropdown'), + choices = facefusion.choices.temp_frame_formats, + value = facefusion.globals.temp_frame_format, + visible = is_video(facefusion.globals.target_path) + ) + + +def listen() -> None: + TEMP_FRAME_FORMAT_DROPDOWN.change(update_temp_frame_format, inputs = TEMP_FRAME_FORMAT_DROPDOWN) + target_video = get_ui_component('target_video') + if target_video: + for method in [ 'upload', 'change', 'clear' ]: + getattr(target_video, method)(remote_update, outputs = TEMP_FRAME_FORMAT_DROPDOWN) + + +def remote_update() -> gradio.Dropdown: + if is_video(facefusion.globals.target_path): + return gradio.Dropdown(visible = True) + return gradio.Dropdown(visible = False) + + +def update_temp_frame_format(temp_frame_format : TempFrameFormat) -> None: + facefusion.globals.temp_frame_format = temp_frame_format + diff --git a/facefusion/uis/components/trim_frame.py b/facefusion/uis/components/trim_frame.py new file mode 100644 index 0000000000000000000000000000000000000000..3a33b3509b5d041fe9b9c3e2cd619abd3fd9b29d --- /dev/null +++ b/facefusion/uis/components/trim_frame.py @@ -0,0 +1,73 @@ +from typing import Any, Dict, Tuple, Optional +import gradio + +import facefusion.globals +from facefusion import wording +from facefusion.vision import count_video_frame_total +from facefusion.filesystem import is_video +from facefusion.uis.core import get_ui_component, register_ui_component + +TRIM_FRAME_START_SLIDER : Optional[gradio.Slider] = None +TRIM_FRAME_END_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global TRIM_FRAME_START_SLIDER + global TRIM_FRAME_END_SLIDER + + trim_frame_start_slider_args : Dict[str, Any] =\ + { + 'label': wording.get('uis.trim_frame_start_slider'), + 'step': 1, + 'minimum': 0, + 'maximum': 100, + 'visible': False + } + trim_frame_end_slider_args : Dict[str, Any] =\ + { + 'label': wording.get('uis.trim_frame_end_slider'), + 'step': 1, + 'minimum': 0, + 'maximum': 100, + 'visible': False + } + if is_video(facefusion.globals.target_path): + video_frame_total = count_video_frame_total(facefusion.globals.target_path) + trim_frame_start_slider_args['value'] = facefusion.globals.trim_frame_start or 0 + trim_frame_start_slider_args['maximum'] = video_frame_total + trim_frame_start_slider_args['visible'] = True + trim_frame_end_slider_args['value'] = facefusion.globals.trim_frame_end or video_frame_total + trim_frame_end_slider_args['maximum'] = video_frame_total + trim_frame_end_slider_args['visible'] = True + with gradio.Row(): + TRIM_FRAME_START_SLIDER = gradio.Slider(**trim_frame_start_slider_args) + TRIM_FRAME_END_SLIDER = gradio.Slider(**trim_frame_end_slider_args) + register_ui_component('trim_frame_start_slider', TRIM_FRAME_START_SLIDER) + register_ui_component('trim_frame_end_slider', TRIM_FRAME_END_SLIDER) + + +def listen() -> None: + TRIM_FRAME_START_SLIDER.release(update_trim_frame_start, inputs = TRIM_FRAME_START_SLIDER) + TRIM_FRAME_END_SLIDER.release(update_trim_frame_end, inputs = TRIM_FRAME_END_SLIDER) + target_video = get_ui_component('target_video') + if target_video: + for method in [ 'upload', 'change', 'clear' ]: + getattr(target_video, method)(remote_update, outputs = [ TRIM_FRAME_START_SLIDER, TRIM_FRAME_END_SLIDER ]) + + +def remote_update() -> Tuple[gradio.Slider, gradio.Slider]: + if is_video(facefusion.globals.target_path): + video_frame_total = count_video_frame_total(facefusion.globals.target_path) + facefusion.globals.trim_frame_start = None + facefusion.globals.trim_frame_end = None + return gradio.Slider(value = 0, maximum = video_frame_total, visible = True), gradio.Slider(value = video_frame_total, maximum = video_frame_total, visible = True) + return gradio.Slider(value = None, maximum = None, visible = False), gradio.Slider(value = None, maximum = None, visible = False) + + +def update_trim_frame_start(trim_frame_start : int) -> None: + facefusion.globals.trim_frame_start = trim_frame_start if trim_frame_start > 0 else None + + +def update_trim_frame_end(trim_frame_end : int) -> None: + video_frame_total = count_video_frame_total(facefusion.globals.target_path) + facefusion.globals.trim_frame_end = trim_frame_end if trim_frame_end < video_frame_total else None diff --git a/facefusion/uis/components/webcam.py b/facefusion/uis/components/webcam.py new file mode 100644 index 0000000000000000000000000000000000000000..e49432a1be1e547c7b6b56e6035acf3310721b3e --- /dev/null +++ b/facefusion/uis/components/webcam.py @@ -0,0 +1,180 @@ +from typing import Optional, Generator, Deque +import os +import platform +import subprocess +import cv2 +import gradio +from time import sleep +from concurrent.futures import ThreadPoolExecutor +from collections import deque +from tqdm import tqdm + +import facefusion.globals +from facefusion import logger, wording +from facefusion.audio import create_empty_audio_frame +from facefusion.content_analyser import analyse_stream +from facefusion.filesystem import filter_image_paths +from facefusion.typing import VisionFrame, Face, Fps +from facefusion.face_analyser import get_average_face +from facefusion.processors.frame.core import get_frame_processors_modules, load_frame_processor_module +from facefusion.ffmpeg import open_ffmpeg +from facefusion.vision import normalize_frame_color, read_static_images, unpack_resolution +from facefusion.uis.typing import StreamMode, WebcamMode +from facefusion.uis.core import get_ui_component, get_ui_components + +WEBCAM_CAPTURE : Optional[cv2.VideoCapture] = None +WEBCAM_IMAGE : Optional[gradio.Image] = None +WEBCAM_START_BUTTON : Optional[gradio.Button] = None +WEBCAM_STOP_BUTTON : Optional[gradio.Button] = None + + +def get_webcam_capture() -> Optional[cv2.VideoCapture]: + global WEBCAM_CAPTURE + + if WEBCAM_CAPTURE is None: + if platform.system().lower() == 'windows': + webcam_capture = cv2.VideoCapture(0, cv2.CAP_DSHOW) + else: + webcam_capture = cv2.VideoCapture(0) + if webcam_capture and webcam_capture.isOpened(): + WEBCAM_CAPTURE = webcam_capture + return WEBCAM_CAPTURE + + +def clear_webcam_capture() -> None: + global WEBCAM_CAPTURE + + if WEBCAM_CAPTURE: + WEBCAM_CAPTURE.release() + WEBCAM_CAPTURE = None + + +def render() -> None: + global WEBCAM_IMAGE + global WEBCAM_START_BUTTON + global WEBCAM_STOP_BUTTON + + WEBCAM_IMAGE = gradio.Image( + label = wording.get('uis.webcam_image') + ) + WEBCAM_START_BUTTON = gradio.Button( + value = wording.get('uis.start_button'), + variant = 'primary', + size = 'sm' + ) + WEBCAM_STOP_BUTTON = gradio.Button( + value = wording.get('uis.stop_button'), + size = 'sm' + ) + + +def listen() -> None: + start_event = None + webcam_mode_radio = get_ui_component('webcam_mode_radio') + webcam_resolution_dropdown = get_ui_component('webcam_resolution_dropdown') + webcam_fps_slider = get_ui_component('webcam_fps_slider') + if webcam_mode_radio and webcam_resolution_dropdown and webcam_fps_slider: + start_event = WEBCAM_START_BUTTON.click(start, inputs = [ webcam_mode_radio, webcam_resolution_dropdown, webcam_fps_slider ], outputs = WEBCAM_IMAGE) + WEBCAM_STOP_BUTTON.click(stop, cancels = start_event) + + for ui_component in get_ui_components( + [ + 'frame_processors_checkbox_group', + 'face_swapper_model_dropdown', + 'face_enhancer_model_dropdown', + 'frame_enhancer_model_dropdown', + 'lip_syncer_model_dropdown', + 'source_image' + ]): + ui_component.change(update, cancels = start_event) + + +def start(webcam_mode : WebcamMode, webcam_resolution : str, webcam_fps : Fps) -> Generator[VisionFrame, None, None]: + facefusion.globals.face_selector_mode = 'one' + facefusion.globals.face_analyser_order = 'large-small' + source_image_paths = filter_image_paths(facefusion.globals.source_paths) + source_frames = read_static_images(source_image_paths) + source_face = get_average_face(source_frames) + stream = None + + if webcam_mode in [ 'udp', 'v4l2' ]: + stream = open_stream(webcam_mode, webcam_resolution, webcam_fps) # type: ignore[arg-type] + webcam_width, webcam_height = unpack_resolution(webcam_resolution) + webcam_capture = get_webcam_capture() + if webcam_capture and webcam_capture.isOpened(): + webcam_capture.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) # type: ignore[attr-defined] + webcam_capture.set(cv2.CAP_PROP_FRAME_WIDTH, webcam_width) + webcam_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, webcam_height) + webcam_capture.set(cv2.CAP_PROP_FPS, webcam_fps) + for capture_frame in multi_process_capture(source_face, webcam_capture, webcam_fps): + if webcam_mode == 'inline': + yield normalize_frame_color(capture_frame) + else: + try: + stream.stdin.write(capture_frame.tobytes()) + except Exception: + clear_webcam_capture() + yield None + + +def multi_process_capture(source_face : Face, webcam_capture : cv2.VideoCapture, webcam_fps : Fps) -> Generator[VisionFrame, None, None]: + with tqdm(desc = wording.get('processing'), unit = 'frame', ascii = ' =', disable = facefusion.globals.log_level in [ 'warn', 'error' ]) as progress: + with ThreadPoolExecutor(max_workers = facefusion.globals.execution_thread_count) as executor: + futures = [] + deque_capture_frames : Deque[VisionFrame] = deque() + while webcam_capture and webcam_capture.isOpened(): + _, capture_frame = webcam_capture.read() + if analyse_stream(capture_frame, webcam_fps): + return + future = executor.submit(process_stream_frame, source_face, capture_frame) + futures.append(future) + for future_done in [ future for future in futures if future.done() ]: + capture_frame = future_done.result() + deque_capture_frames.append(capture_frame) + futures.remove(future_done) + while deque_capture_frames: + progress.update() + yield deque_capture_frames.popleft() + + +def update() -> None: + for frame_processor in facefusion.globals.frame_processors: + frame_processor_module = load_frame_processor_module(frame_processor) + while not frame_processor_module.post_check(): + logger.disable() + sleep(0.5) + logger.enable() + + +def stop() -> gradio.Image: + clear_webcam_capture() + return gradio.Image(value = None) + + +def process_stream_frame(source_face : Face, target_vision_frame : VisionFrame) -> VisionFrame: + source_audio_frame = create_empty_audio_frame() + for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): + logger.disable() + if frame_processor_module.pre_process('stream'): + logger.enable() + target_vision_frame = frame_processor_module.process_frame( + { + 'source_face': source_face, + 'source_audio_frame': source_audio_frame, + 'target_vision_frame': target_vision_frame + }) + return target_vision_frame + + +def open_stream(stream_mode : StreamMode, stream_resolution : str, stream_fps : Fps) -> subprocess.Popen[bytes]: + commands = [ '-f', 'rawvideo', '-pix_fmt', 'bgr24', '-s', stream_resolution, '-r', str(stream_fps), '-i', '-'] + if stream_mode == 'udp': + commands.extend([ '-b:v', '2000k', '-f', 'mpegts', 'udp://localhost:27000?pkt_size=1316' ]) + if stream_mode == 'v4l2': + try: + device_name = os.listdir('/sys/devices/virtual/video4linux')[0] + if device_name: + commands.extend([ '-f', 'v4l2', '/dev/' + device_name ]) + except FileNotFoundError: + logger.error(wording.get('stream_not_loaded').format(stream_mode = stream_mode), __name__.upper()) + return open_ffmpeg(commands) diff --git a/facefusion/uis/components/webcam_options.py b/facefusion/uis/components/webcam_options.py new file mode 100644 index 0000000000000000000000000000000000000000..ea707b05e23d714dec53ff0f90e4c279fb6a7230 --- /dev/null +++ b/facefusion/uis/components/webcam_options.py @@ -0,0 +1,37 @@ +from typing import Optional +import gradio + +from facefusion import wording +from facefusion.uis import choices as uis_choices +from facefusion.uis.core import register_ui_component + +WEBCAM_MODE_RADIO : Optional[gradio.Radio] = None +WEBCAM_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None +WEBCAM_FPS_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global WEBCAM_MODE_RADIO + global WEBCAM_RESOLUTION_DROPDOWN + global WEBCAM_FPS_SLIDER + + WEBCAM_MODE_RADIO = gradio.Radio( + label = wording.get('uis.webcam_mode_radio'), + choices = uis_choices.webcam_modes, + value = 'inline' + ) + WEBCAM_RESOLUTION_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.webcam_resolution_dropdown'), + choices = uis_choices.webcam_resolutions, + value = uis_choices.webcam_resolutions[0] + ) + WEBCAM_FPS_SLIDER = gradio.Slider( + label = wording.get('uis.webcam_fps_slider'), + value = 25, + step = 1, + minimum = 1, + maximum = 60 + ) + register_ui_component('webcam_mode_radio', WEBCAM_MODE_RADIO) + register_ui_component('webcam_resolution_dropdown', WEBCAM_RESOLUTION_DROPDOWN) + register_ui_component('webcam_fps_slider', WEBCAM_FPS_SLIDER) diff --git a/facefusion/uis/core.py b/facefusion/uis/core.py new file mode 100644 index 0000000000000000000000000000000000000000..e5d2e6ff1d12244c0fa6c597b4cfe46b0d16d122 --- /dev/null +++ b/facefusion/uis/core.py @@ -0,0 +1,153 @@ +from typing import Dict, Optional, Any, List +from types import ModuleType +import importlib +import sys +import gradio + +import facefusion.globals +from facefusion.uis import overrides +from facefusion import metadata, logger, wording +from facefusion.uis.typing import Component, ComponentName +from facefusion.filesystem import resolve_relative_path + +gradio.processing_utils.encode_array_to_base64 = overrides.encode_array_to_base64 +gradio.processing_utils.encode_pil_to_base64 = overrides.encode_pil_to_base64 + +UI_COMPONENTS: Dict[ComponentName, Component] = {} +UI_LAYOUT_MODULES : List[ModuleType] = [] +UI_LAYOUT_METHODS =\ +[ + 'pre_check', + 'pre_render', + 'render', + 'listen', + 'run' +] + + +def load_ui_layout_module(ui_layout : str) -> Any: + try: + ui_layout_module = importlib.import_module('facefusion.uis.layouts.' + ui_layout) + for method_name in UI_LAYOUT_METHODS: + if not hasattr(ui_layout_module, method_name): + raise NotImplementedError + except ModuleNotFoundError as exception: + logger.error(wording.get('ui_layout_not_loaded').format(ui_layout = ui_layout), __name__.upper()) + logger.debug(exception.msg, __name__.upper()) + sys.exit(1) + except NotImplementedError: + logger.error(wording.get('ui_layout_not_implemented').format(ui_layout = ui_layout), __name__.upper()) + sys.exit(1) + return ui_layout_module + + +def get_ui_layouts_modules(ui_layouts : List[str]) -> List[ModuleType]: + global UI_LAYOUT_MODULES + + if not UI_LAYOUT_MODULES: + for ui_layout in ui_layouts: + ui_layout_module = load_ui_layout_module(ui_layout) + UI_LAYOUT_MODULES.append(ui_layout_module) + return UI_LAYOUT_MODULES + + +def get_ui_component(component_name : ComponentName) -> Optional[Component]: + if component_name in UI_COMPONENTS: + return UI_COMPONENTS[component_name] + return None + + +def get_ui_components(component_names : List[ComponentName]) -> Optional[List[Component]]: + ui_components = [] + + for component_name in component_names: + component = get_ui_component(component_name) + if component: + ui_components.append(component) + return ui_components + + +def register_ui_component(component_name : ComponentName, component: Component) -> None: + UI_COMPONENTS[component_name] = component + + +def launch() -> None: + ui_layouts_total = len(facefusion.globals.ui_layouts) + with gradio.Blocks(theme = get_theme(), css = get_css(), title = metadata.get('name') + ' ' + metadata.get('version')) as ui: + for ui_layout in facefusion.globals.ui_layouts: + ui_layout_module = load_ui_layout_module(ui_layout) + if ui_layout_module.pre_render(): + if ui_layouts_total > 1: + with gradio.Tab(ui_layout): + ui_layout_module.render() + ui_layout_module.listen() + else: + ui_layout_module.render() + ui_layout_module.listen() + + for ui_layout in facefusion.globals.ui_layouts: + ui_layout_module = load_ui_layout_module(ui_layout) + ui_layout_module.run(ui) + + +def get_theme() -> gradio.Theme: + return gradio.themes.Base( + primary_hue = gradio.themes.colors.red, + secondary_hue = gradio.themes.colors.neutral, + font = gradio.themes.GoogleFont('Open Sans') + ).set( + background_fill_primary = '*neutral_100', + block_background_fill = 'white', + block_border_width = '0', + block_label_background_fill = '*primary_100', + block_label_background_fill_dark = '*primary_600', + block_label_border_width = 'none', + block_label_margin = '0.5rem', + block_label_radius = '*radius_md', + block_label_text_color = '*primary_500', + block_label_text_color_dark = 'white', + block_label_text_weight = '600', + block_title_background_fill = '*primary_100', + block_title_background_fill_dark = '*primary_600', + block_title_padding = '*block_label_padding', + block_title_radius = '*block_label_radius', + block_title_text_color = '*primary_500', + block_title_text_size = '*text_sm', + block_title_text_weight = '600', + block_padding = '0.5rem', + border_color_primary = 'transparent', + border_color_primary_dark = 'transparent', + button_large_padding = '2rem 0.5rem', + button_large_text_weight = 'normal', + button_primary_background_fill = '*primary_500', + button_primary_text_color = 'white', + button_secondary_background_fill = 'white', + button_secondary_border_color = 'transparent', + button_secondary_border_color_dark = 'transparent', + button_secondary_border_color_hover = 'transparent', + button_secondary_border_color_hover_dark = 'transparent', + button_secondary_text_color = '*neutral_800', + button_small_padding = '0.75rem', + checkbox_background_color = '*neutral_200', + checkbox_background_color_selected = '*primary_600', + checkbox_background_color_selected_dark = '*primary_700', + checkbox_border_color_focus = '*primary_500', + checkbox_border_color_focus_dark = '*primary_600', + checkbox_border_color_selected = '*primary_600', + checkbox_border_color_selected_dark = '*primary_700', + checkbox_label_background_fill = '*neutral_50', + checkbox_label_background_fill_hover = '*neutral_50', + checkbox_label_background_fill_selected = '*primary_500', + checkbox_label_background_fill_selected_dark = '*primary_600', + checkbox_label_text_color_selected = 'white', + input_background_fill = '*neutral_50', + shadow_drop = 'none', + slider_color = '*primary_500', + slider_color_dark = '*primary_600' + ) + + +def get_css() -> str: + fixes_css_path = resolve_relative_path('uis/assets/fixes.css') + overrides_css_path = resolve_relative_path('uis/assets/overrides.css') + return open(fixes_css_path, 'r').read() + open(overrides_css_path, 'r').read() diff --git a/facefusion/uis/layouts/__pycache__/default.cpython-310.pyc b/facefusion/uis/layouts/__pycache__/default.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e971148b6ef1da8c0e51d49d17bc922bf1cc6bbf Binary files /dev/null and b/facefusion/uis/layouts/__pycache__/default.cpython-310.pyc differ diff --git a/facefusion/uis/layouts/benchmark.py b/facefusion/uis/layouts/benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..f70f37ae68ac5f031b174d1f39c1a9fe3478cd6c --- /dev/null +++ b/facefusion/uis/layouts/benchmark.py @@ -0,0 +1,67 @@ +import multiprocessing +import gradio + +import facefusion.globals +from facefusion.download import conditional_download +from facefusion.uis.components import about, frame_processors, frame_processors_options, execution, execution_thread_count, execution_queue_count, memory, benchmark_options, benchmark + + +def pre_check() -> bool: + if not facefusion.globals.skip_download: + conditional_download('.assets/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-360p.mp4', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-540p.mp4', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-720p.mp4', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-1080p.mp4', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-1440p.mp4', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-2160p.mp4' + ]) + return True + return False + + +def pre_render() -> bool: + return True + + +def render() -> gradio.Blocks: + with gradio.Blocks() as layout: + with gradio.Row(): + with gradio.Column(scale = 2): + with gradio.Blocks(): + about.render() + with gradio.Blocks(): + frame_processors.render() + with gradio.Blocks(): + frame_processors_options.render() + with gradio.Blocks(): + execution.render() + execution_thread_count.render() + execution_queue_count.render() + with gradio.Blocks(): + memory.render() + with gradio.Blocks(): + benchmark_options.render() + with gradio.Column(scale = 5): + with gradio.Blocks(): + benchmark.render() + return layout + + +def listen() -> None: + frame_processors.listen() + frame_processors_options.listen() + execution.listen() + execution_thread_count.listen() + execution_queue_count.listen() + memory.listen() + benchmark.listen() + + +def run(ui : gradio.Blocks) -> None: + concurrency_count = min(2, multiprocessing.cpu_count()) + ui.queue(concurrency_count = concurrency_count).launch(show_api = False, quiet = True) diff --git a/facefusion/uis/layouts/default.py b/facefusion/uis/layouts/default.py new file mode 100644 index 0000000000000000000000000000000000000000..eb3f07dcc824030a497d63d67d763f3e1396b2ee --- /dev/null +++ b/facefusion/uis/layouts/default.py @@ -0,0 +1,80 @@ +import multiprocessing +import gradio + +from facefusion.uis.components import about, frame_processors, frame_processors_options, execution, execution_thread_count, execution_queue_count, memory, temp_frame, output_options, common_options, source, target, output, preview, trim_frame, face_analyser, face_selector, face_masker + + +def pre_check() -> bool: + return True + + +def pre_render() -> bool: + return True + + +def render() -> gradio.Blocks: + with gradio.Blocks() as layout: + with gradio.Row(): + with gradio.Column(scale = 2): + with gradio.Blocks(): + about.render() + with gradio.Blocks(): + frame_processors.render() + with gradio.Blocks(): + frame_processors_options.render() + with gradio.Blocks(): + execution.render() + execution_thread_count.render() + execution_queue_count.render() + with gradio.Blocks(): + memory.render() + with gradio.Blocks(): + temp_frame.render() + with gradio.Blocks(): + output_options.render() + with gradio.Column(scale = 2): + with gradio.Blocks(): + source.render() + with gradio.Blocks(): + target.render() + with gradio.Blocks(): + output.render() + with gradio.Column(scale = 3): + with gradio.Blocks(): + preview.render() + with gradio.Blocks(): + trim_frame.render() + with gradio.Blocks(): + face_selector.render() + with gradio.Blocks(): + face_masker.render() + with gradio.Blocks(): + face_analyser.render() + with gradio.Blocks(): + common_options.render() + return layout + + +def listen() -> None: + frame_processors.listen() + frame_processors_options.listen() + execution.listen() + execution_thread_count.listen() + execution_queue_count.listen() + memory.listen() + temp_frame.listen() + output_options.listen() + source.listen() + target.listen() + output.listen() + preview.listen() + trim_frame.listen() + face_selector.listen() + face_masker.listen() + face_analyser.listen() + common_options.listen() + + +def run(ui : gradio.Blocks) -> None: + concurrency_count = min(8, multiprocessing.cpu_count()) + ui.queue(concurrency_count = concurrency_count).launch(show_api = False, quiet = True) diff --git a/facefusion/uis/layouts/webcam.py b/facefusion/uis/layouts/webcam.py new file mode 100644 index 0000000000000000000000000000000000000000..016a0486ff4b8cc0877eebf5fab4989f456b4504 --- /dev/null +++ b/facefusion/uis/layouts/webcam.py @@ -0,0 +1,49 @@ +import multiprocessing +import gradio + +from facefusion.uis.components import about, frame_processors, frame_processors_options, execution, execution_thread_count, webcam_options, source, webcam + + +def pre_check() -> bool: + return True + + +def pre_render() -> bool: + return True + + +def render() -> gradio.Blocks: + with gradio.Blocks() as layout: + with gradio.Row(): + with gradio.Column(scale = 2): + with gradio.Blocks(): + about.render() + with gradio.Blocks(): + frame_processors.render() + with gradio.Blocks(): + frame_processors_options.render() + with gradio.Blocks(): + execution.render() + execution_thread_count.render() + with gradio.Blocks(): + webcam_options.render() + with gradio.Blocks(): + source.render() + with gradio.Column(scale = 5): + with gradio.Blocks(): + webcam.render() + return layout + + +def listen() -> None: + frame_processors.listen() + frame_processors_options.listen() + execution.listen() + execution_thread_count.listen() + source.listen() + webcam.listen() + + +def run(ui : gradio.Blocks) -> None: + concurrency_count = min(2, multiprocessing.cpu_count()) + ui.queue(concurrency_count = concurrency_count).launch(show_api = False, quiet = True) diff --git a/facefusion/uis/overrides.py b/facefusion/uis/overrides.py new file mode 100644 index 0000000000000000000000000000000000000000..7f3c4707499aedec1436422f2e882b279862e84a --- /dev/null +++ b/facefusion/uis/overrides.py @@ -0,0 +1,13 @@ +from typing import Any +import cv2 +import numpy +import base64 + + +def encode_array_to_base64(array : numpy.ndarray[Any, Any]) -> str: + buffer = cv2.imencode('.jpg', array[:, :, ::-1])[1] + return 'data:image/jpeg;base64,' + base64.b64encode(buffer.tobytes()).decode('utf-8') + + +def encode_pil_to_base64(image : Any) -> str: + return encode_array_to_base64(numpy.asarray(image)[:, :, ::-1]) diff --git a/facefusion/uis/typing.py b/facefusion/uis/typing.py new file mode 100644 index 0000000000000000000000000000000000000000..59d06f539c61dd08946c4ee3ace4634174b45502 --- /dev/null +++ b/facefusion/uis/typing.py @@ -0,0 +1,53 @@ +from typing import Literal, Any, IO +import gradio + +File = IO[Any] +Component = gradio.File or gradio.Image or gradio.Video or gradio.Slider +ComponentName = Literal\ +[ + 'source_audio', + 'source_image', + 'target_image', + 'target_video', + 'preview_frame_slider', + 'trim_frame_start_slider', + 'trim_frame_end_slider', + 'face_selector_mode_dropdown', + 'reference_face_position_gallery', + 'reference_face_distance_slider', + 'face_analyser_order_dropdown', + 'face_analyser_age_dropdown', + 'face_analyser_gender_dropdown', + 'face_detector_model_dropdown', + 'face_detector_size_dropdown', + 'face_detector_score_slider', + 'face_landmarker_score_slider', + 'face_mask_types_checkbox_group', + 'face_mask_blur_slider', + 'face_mask_padding_top_slider', + 'face_mask_padding_bottom_slider', + 'face_mask_padding_left_slider', + 'face_mask_padding_right_slider', + 'face_mask_region_checkbox_group', + 'frame_processors_checkbox_group', + 'face_debugger_items_checkbox_group', + 'face_enhancer_model_dropdown', + 'face_enhancer_blend_slider', + 'face_swapper_model_dropdown', + 'frame_colorizer_model_dropdown', + 'frame_colorizer_blend_slider', + 'frame_colorizer_size_dropdown', + 'frame_enhancer_model_dropdown', + 'frame_enhancer_blend_slider', + 'lip_syncer_model_dropdown', + 'output_path_textbox', + 'output_video_fps_slider', + 'benchmark_runs_checkbox_group', + 'benchmark_cycles_slider', + 'webcam_mode_radio', + 'webcam_resolution_dropdown', + 'webcam_fps_slider' +] + +WebcamMode = Literal['inline', 'udp', 'v4l2'] +StreamMode = Literal['udp', 'v4l2'] diff --git a/facefusion/vision.py b/facefusion/vision.py new file mode 100644 index 0000000000000000000000000000000000000000..6f9dc3cd7da6cc899970b7d07f67b70b996ecd8b --- /dev/null +++ b/facefusion/vision.py @@ -0,0 +1,218 @@ +from typing import Optional, List, Tuple +from functools import lru_cache +import cv2 +import numpy +from cv2.typing import Size + +from facefusion.typing import VisionFrame, Resolution, Fps +from facefusion.choices import image_template_sizes, video_template_sizes +from facefusion.filesystem import is_image, is_video + + +@lru_cache(maxsize = 128) +def read_static_image(image_path : str) -> Optional[VisionFrame]: + return read_image(image_path) + + +def read_static_images(image_paths : List[str]) -> Optional[List[VisionFrame]]: + frames = [] + if image_paths: + for image_path in image_paths: + frames.append(read_static_image(image_path)) + return frames + + +def read_image(image_path : str) -> Optional[VisionFrame]: + if is_image(image_path): + return cv2.imread(image_path) + return None + + +def write_image(image_path : str, vision_frame : VisionFrame) -> bool: + if image_path: + return cv2.imwrite(image_path, vision_frame) + return False + + +def detect_image_resolution(image_path : str) -> Optional[Resolution]: + if is_image(image_path): + image = read_image(image_path) + height, width = image.shape[:2] + return width, height + return None + + +def restrict_image_resolution(image_path : str, resolution : Resolution) -> Resolution: + if is_image(image_path): + image_resolution = detect_image_resolution(image_path) + if image_resolution < resolution: + return image_resolution + return resolution + + +def get_video_frame(video_path : str, frame_number : int = 0) -> Optional[VisionFrame]: + if is_video(video_path): + video_capture = cv2.VideoCapture(video_path) + if video_capture.isOpened(): + frame_total = video_capture.get(cv2.CAP_PROP_FRAME_COUNT) + video_capture.set(cv2.CAP_PROP_POS_FRAMES, min(frame_total, frame_number - 1)) + has_vision_frame, vision_frame = video_capture.read() + video_capture.release() + if has_vision_frame: + return vision_frame + return None + + +def create_image_resolutions(resolution : Resolution) -> List[str]: + resolutions = [] + temp_resolutions = [] + + if resolution: + width, height = resolution + temp_resolutions.append(normalize_resolution(resolution)) + for template_size in image_template_sizes: + temp_resolutions.append(normalize_resolution((width * template_size, height * template_size))) + temp_resolutions = sorted(set(temp_resolutions)) + for temp_resolution in temp_resolutions: + resolutions.append(pack_resolution(temp_resolution)) + return resolutions + + +def count_video_frame_total(video_path : str) -> int: + if is_video(video_path): + video_capture = cv2.VideoCapture(video_path) + if video_capture.isOpened(): + video_frame_total = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT)) + video_capture.release() + return video_frame_total + return 0 + + +def detect_video_fps(video_path : str) -> Optional[float]: + if is_video(video_path): + video_capture = cv2.VideoCapture(video_path) + if video_capture.isOpened(): + video_fps = video_capture.get(cv2.CAP_PROP_FPS) + video_capture.release() + return video_fps + return None + + +def restrict_video_fps(video_path : str, fps : Fps) -> Fps: + if is_video(video_path): + video_fps = detect_video_fps(video_path) + if video_fps < fps: + return video_fps + return fps + + +def detect_video_resolution(video_path : str) -> Optional[Resolution]: + if is_video(video_path): + video_capture = cv2.VideoCapture(video_path) + if video_capture.isOpened(): + width = video_capture.get(cv2.CAP_PROP_FRAME_WIDTH) + height = video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT) + video_capture.release() + return int(width), int(height) + return None + + +def restrict_video_resolution(video_path : str, resolution : Resolution) -> Resolution: + if is_video(video_path): + video_resolution = detect_video_resolution(video_path) + if video_resolution < resolution: + return video_resolution + return resolution + + +def create_video_resolutions(resolution : Resolution) -> List[str]: + resolutions = [] + temp_resolutions = [] + + if resolution: + width, height = resolution + temp_resolutions.append(normalize_resolution(resolution)) + for template_size in video_template_sizes: + if width > height: + temp_resolutions.append(normalize_resolution((template_size * width / height, template_size))) + else: + temp_resolutions.append(normalize_resolution((template_size, template_size * height / width))) + temp_resolutions = sorted(set(temp_resolutions)) + for temp_resolution in temp_resolutions: + resolutions.append(pack_resolution(temp_resolution)) + return resolutions + + +def normalize_resolution(resolution : Tuple[float, float]) -> Resolution: + width, height = resolution + + if width and height: + normalize_width = round(width / 2) * 2 + normalize_height = round(height / 2) * 2 + return normalize_width, normalize_height + return 0, 0 + + +def pack_resolution(resolution : Resolution) -> str: + width, height = normalize_resolution(resolution) + return str(width) + 'x' + str(height) + + +def unpack_resolution(resolution : str) -> Resolution: + width, height = map(int, resolution.split('x')) + return width, height + + +def resize_frame_resolution(vision_frame : VisionFrame, max_resolution : Resolution) -> VisionFrame: + height, width = vision_frame.shape[:2] + max_width, max_height = max_resolution + + if height > max_height or width > max_width: + scale = min(max_height / height, max_width / width) + new_width = int(width * scale) + new_height = int(height * scale) + return cv2.resize(vision_frame, (new_width, new_height)) + return vision_frame + + +def normalize_frame_color(vision_frame : VisionFrame) -> VisionFrame: + return cv2.cvtColor(vision_frame, cv2.COLOR_BGR2RGB) + + +def create_tile_frames(vision_frame : VisionFrame, size : Size) -> Tuple[List[VisionFrame], int, int]: + vision_frame = numpy.pad(vision_frame, ((size[1], size[1]), (size[1], size[1]), (0, 0))) + tile_width = size[0] - 2 * size[2] + pad_size_bottom = size[2] + tile_width - vision_frame.shape[0] % tile_width + pad_size_right = size[2] + tile_width - vision_frame.shape[1] % tile_width + pad_vision_frame = numpy.pad(vision_frame, ((size[2], pad_size_bottom), (size[2], pad_size_right), (0, 0))) + pad_height, pad_width = pad_vision_frame.shape[:2] + row_range = range(size[2], pad_height - size[2], tile_width) + col_range = range(size[2], pad_width - size[2], tile_width) + tile_vision_frames = [] + + for row_vision_frame in row_range: + top = row_vision_frame - size[2] + bottom = row_vision_frame + size[2] + tile_width + for column_vision_frame in col_range: + left = column_vision_frame - size[2] + right = column_vision_frame + size[2] + tile_width + tile_vision_frames.append(pad_vision_frame[top:bottom, left:right, :]) + return tile_vision_frames, pad_width, pad_height + + +def merge_tile_frames(tile_vision_frames : List[VisionFrame], temp_width : int, temp_height : int, pad_width : int, pad_height : int, size : Size) -> VisionFrame: + merge_vision_frame = numpy.zeros((pad_height, pad_width, 3)).astype(numpy.uint8) + tile_width = tile_vision_frames[0].shape[1] - 2 * size[2] + tiles_per_row = min(pad_width // tile_width, len(tile_vision_frames)) + + for index, tile_vision_frame in enumerate(tile_vision_frames): + tile_vision_frame = tile_vision_frame[size[2]:-size[2], size[2]:-size[2]] + row_index = index // tiles_per_row + col_index = index % tiles_per_row + top = row_index * tile_vision_frame.shape[0] + bottom = top + tile_vision_frame.shape[0] + left = col_index * tile_vision_frame.shape[1] + right = left + tile_vision_frame.shape[1] + merge_vision_frame[top:bottom, left:right, :] = tile_vision_frame + merge_vision_frame = merge_vision_frame[size[1] : size[1] + temp_height, size[1]: size[1] + temp_width, :] + return merge_vision_frame diff --git a/facefusion/voice_extractor.py b/facefusion/voice_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..0208aeb5b35565c2aa1f0c5c73e622bfaeaf329b --- /dev/null +++ b/facefusion/voice_extractor.py @@ -0,0 +1,129 @@ +from typing import Any, Tuple +from time import sleep +import scipy +import numpy +import onnxruntime + +import facefusion.globals +from facefusion import process_manager +from facefusion.thread_helper import thread_lock, thread_semaphore +from facefusion.typing import ModelSet, AudioChunk, Audio +from facefusion.execution import apply_execution_provider_options +from facefusion.filesystem import resolve_relative_path, is_file +from facefusion.download import conditional_download + +VOICE_EXTRACTOR = None +MODELS : ModelSet =\ +{ + 'voice_extractor': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/voice_extractor.onnx', + 'path': resolve_relative_path('../.assets/models/voice_extractor.onnx') + } +} + + +def get_voice_extractor() -> Any: + global VOICE_EXTRACTOR + + with thread_lock(): + while process_manager.is_checking(): + sleep(0.5) + if VOICE_EXTRACTOR is None: + model_path = MODELS.get('voice_extractor').get('path') + VOICE_EXTRACTOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + return VOICE_EXTRACTOR + + +def clear_voice_extractor() -> None: + global VOICE_EXTRACTOR + + VOICE_EXTRACTOR = None + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../.assets/models') + model_url = MODELS.get('voice_extractor').get('url') + model_path = MODELS.get('voice_extractor').get('path') + + if not facefusion.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, [ model_url ]) + process_manager.end() + return is_file(model_path) + + +def batch_extract_voice(audio : Audio, chunk_size : int, step_size : int) -> Audio: + temp_audio = numpy.zeros((audio.shape[0], 2)).astype(numpy.float32) + temp_chunk = numpy.zeros((audio.shape[0], 2)).astype(numpy.float32) + + for start in range(0, audio.shape[0], step_size): + end = min(start + chunk_size, audio.shape[0]) + temp_audio[start:end, ...] += extract_voice(audio[start:end, ...]) + temp_chunk[start:end, ...] += 1 + audio = temp_audio / temp_chunk + return audio + + +def extract_voice(temp_audio_chunk : AudioChunk) -> AudioChunk: + voice_extractor = get_voice_extractor() + chunk_size = 1024 * (voice_extractor.get_inputs()[0].shape[3] - 1) + trim_size = 3840 + temp_audio_chunk, pad_size = prepare_audio_chunk(temp_audio_chunk.T, chunk_size, trim_size) + temp_audio_chunk = decompose_audio_chunk(temp_audio_chunk, trim_size) + with thread_semaphore(): + temp_audio_chunk = voice_extractor.run(None, + { + voice_extractor.get_inputs()[0].name: temp_audio_chunk + })[0] + temp_audio_chunk = compose_audio_chunk(temp_audio_chunk, trim_size) + temp_audio_chunk = normalize_audio_chunk(temp_audio_chunk, chunk_size, trim_size, pad_size) + return temp_audio_chunk + + +def prepare_audio_chunk(temp_audio_chunk : AudioChunk, chunk_size : int, trim_size : int) -> Tuple[AudioChunk, int]: + step_size = chunk_size - 2 * trim_size + pad_size = step_size - temp_audio_chunk.shape[1] % step_size + audio_chunk_size = temp_audio_chunk.shape[1] + pad_size + temp_audio_chunk = temp_audio_chunk.astype(numpy.float32) / numpy.iinfo(numpy.int16).max + temp_audio_chunk = numpy.pad(temp_audio_chunk, ((0, 0), (trim_size, trim_size + pad_size))) + temp_audio_chunks = [] + + for index in range(0, audio_chunk_size, step_size): + temp_audio_chunks.append(temp_audio_chunk[:, index:index + chunk_size]) + temp_audio_chunk = numpy.concatenate(temp_audio_chunks, axis = 0) + temp_audio_chunk = temp_audio_chunk.reshape((-1, chunk_size)) + return temp_audio_chunk, pad_size + + +def decompose_audio_chunk(temp_audio_chunk : AudioChunk, trim_size : int) -> AudioChunk: + frame_size = 7680 + frame_overlap = 6656 + voice_extractor_shape = get_voice_extractor().get_inputs()[0].shape + window = scipy.signal.windows.hann(frame_size) + temp_audio_chunk = scipy.signal.stft(temp_audio_chunk, nperseg = frame_size, noverlap = frame_overlap, window = window)[2] + temp_audio_chunk = numpy.stack((numpy.real(temp_audio_chunk), numpy.imag(temp_audio_chunk)), axis = -1).transpose((0, 3, 1, 2)) + temp_audio_chunk = temp_audio_chunk.reshape(-1, 2, 2, trim_size + 1, voice_extractor_shape[3]).reshape(-1, voice_extractor_shape[1], trim_size + 1, voice_extractor_shape[3]) + temp_audio_chunk = temp_audio_chunk[:, :, :voice_extractor_shape[2]] + temp_audio_chunk /= numpy.sqrt(1.0 / window.sum() ** 2) + return temp_audio_chunk + + +def compose_audio_chunk(temp_audio_chunk : AudioChunk, trim_size : int) -> AudioChunk: + frame_size = 7680 + frame_overlap = 6656 + voice_extractor_shape = get_voice_extractor().get_inputs()[0].shape + window = scipy.signal.windows.hann(frame_size) + temp_audio_chunk = numpy.pad(temp_audio_chunk, ((0, 0), (0, 0), (0, trim_size + 1 - voice_extractor_shape[2]), (0, 0))) + temp_audio_chunk = temp_audio_chunk.reshape(-1, 2, trim_size + 1, voice_extractor_shape[3]).transpose((0, 2, 3, 1)) + temp_audio_chunk = temp_audio_chunk[:, :, :, 0] + 1j * temp_audio_chunk[:, :, :, 1] + temp_audio_chunk = scipy.signal.istft(temp_audio_chunk, nperseg = frame_size, noverlap = frame_overlap, window = window)[1] + temp_audio_chunk *= numpy.sqrt(1.0 / window.sum() ** 2) + return temp_audio_chunk + + +def normalize_audio_chunk(temp_audio_chunk : AudioChunk, chunk_size : int, trim_size : int, pad_size : int) -> AudioChunk: + temp_audio_chunk = temp_audio_chunk.reshape((-1, 2, chunk_size)) + temp_audio_chunk = temp_audio_chunk[:, :, trim_size:-trim_size].transpose(1, 0, 2) + temp_audio_chunk = temp_audio_chunk.reshape(2, -1)[:, :-pad_size].T + return temp_audio_chunk diff --git a/facefusion/wording.py b/facefusion/wording.py new file mode 100644 index 0000000000000000000000000000000000000000..c5da7a696f2744765c6122b323eedf397efc617d --- /dev/null +++ b/facefusion/wording.py @@ -0,0 +1,217 @@ +from typing import Any, Dict, Optional + +WORDING : Dict[str, Any] =\ +{ + 'conda_not_activated': 'Conda is not activated', + 'python_not_supported': 'Python version is not supported, upgrade to {version} or higher', + 'ffmpeg_not_installed': 'FFMpeg is not installed', + 'creating_temp': 'Creating temporary resources', + 'extracting_frames': 'Extracting frames with a resolution of {resolution} and {fps} frames per second', + 'extracting_frames_succeed': 'Extracting frames succeed', + 'extracting_frames_failed': 'Extracting frames failed', + 'analysing': 'Analysing', + 'processing': 'Processing', + 'downloading': 'Downloading', + 'temp_frames_not_found': 'Temporary frames not found', + 'copying_image': 'Copying image with a resolution of {resolution}', + 'copying_image_succeed': 'Copying image succeed', + 'copying_image_failed': 'Copying image failed', + 'finalizing_image': 'Finalizing image with a resolution of {resolution}', + 'finalizing_image_succeed': 'Finalizing image succeed', + 'finalizing_image_skipped': 'Finalizing image skipped', + 'merging_video': 'Merging video with a resolution of {resolution} and {fps} frames per second', + 'merging_video_succeed': 'Merging video succeed', + 'merging_video_failed': 'Merging video failed', + 'skipping_audio': 'Skipping audio', + 'restoring_audio_succeed': 'Restoring audio succeed', + 'restoring_audio_skipped': 'Restoring audio skipped', + 'clearing_temp': 'Clearing temporary resources', + 'processing_stopped': 'Processing stopped', + 'processing_image_succeed': 'Processing to image succeed in {seconds} seconds', + 'processing_image_failed': 'Processing to image failed', + 'processing_video_succeed': 'Processing to video succeed in {seconds} seconds', + 'processing_video_failed': 'Processing to video failed', + 'model_download_not_done': 'Download of the model is not done', + 'model_file_not_present': 'File of the model is not present', + 'select_image_source': 'Select a image for source path', + 'select_audio_source': 'Select a audio for source path', + 'select_video_target': 'Select a video for target path', + 'select_image_or_video_target': 'Select a image or video for target path', + 'select_file_or_directory_output': 'Select a file or directory for output path', + 'no_source_face_detected': 'No source face detected', + 'frame_processor_not_loaded': 'Frame processor {frame_processor} could not be loaded', + 'frame_processor_not_implemented': 'Frame processor {frame_processor} not implemented correctly', + 'ui_layout_not_loaded': 'UI layout {ui_layout} could not be loaded', + 'ui_layout_not_implemented': 'UI layout {ui_layout} not implemented correctly', + 'stream_not_loaded': 'Stream {stream_mode} could not be loaded', + 'point': '.', + 'comma': ',', + 'colon': ':', + 'question_mark': '?', + 'exclamation_mark': '!', + 'help': + { + # installer + 'install_dependency': 'select the variant of {dependency} to install', + 'skip_conda': 'skip the conda environment check', + # general + 'source': 'choose single or multiple source images or audios', + 'target': 'choose single target image or video', + 'output': 'specify the output file or directory', + # misc + 'force_download': 'force automate downloads and exit', + 'skip_download': 'omit automate downloads and remote lookups', + 'headless': 'run the program without a user interface', + 'log_level': 'adjust the message severity displayed in the terminal', + # execution + 'execution_providers': 'accelerate the model inference using different providers (choices: {choices}, ...)', + 'execution_thread_count': 'specify the amount of parallel threads while processing', + 'execution_queue_count': 'specify the amount of frames each thread is processing', + # memory + 'video_memory_strategy': 'balance fast frame processing and low VRAM usage', + 'system_memory_limit': 'limit the available RAM that can be used while processing', + # face analyser + 'face_analyser_order': 'specify the order in which the face analyser detects faces', + 'face_analyser_age': 'filter the detected faces based on their age', + 'face_analyser_gender': 'filter the detected faces based on their gender', + 'face_detector_model': 'choose the model responsible for detecting the face', + 'face_detector_size': 'specify the size of the frame provided to the face detector', + 'face_detector_score': 'filter the detected faces base on the confidence score', + 'face_landmarker_score': 'filter the detected landmarks base on the confidence score', + # face selector + 'face_selector_mode': 'use reference based tracking or simple matching', + 'reference_face_position': 'specify the position used to create the reference face', + 'reference_face_distance': 'specify the desired similarity between the reference face and target face', + 'reference_frame_number': 'specify the frame used to create the reference face', + # face mask + 'face_mask_types': 'mix and match different face mask types (choices: {choices})', + 'face_mask_blur': 'specify the degree of blur applied the box mask', + 'face_mask_padding': 'apply top, right, bottom and left padding to the box mask', + 'face_mask_regions': 'choose the facial features used for the region mask (choices: {choices})', + # frame extraction + 'trim_frame_start': 'specify the the start frame of the target video', + 'trim_frame_end': 'specify the the end frame of the target video', + 'temp_frame_format': 'specify the temporary resources format', + 'keep_temp': 'keep the temporary resources after processing', + # output creation + 'output_image_quality': 'specify the image quality which translates to the compression factor', + 'output_image_resolution': 'specify the image output resolution based on the target image', + 'output_video_encoder': 'specify the encoder use for the video compression', + 'output_video_preset': 'balance fast video processing and video file size', + 'output_video_quality': 'specify the video quality which translates to the compression factor', + 'output_video_resolution': 'specify the video output resolution based on the target video', + 'output_video_fps': 'specify the video output fps based on the target video', + 'skip_audio': 'omit the audio from the target video', + # frame processors + 'frame_processors': 'load a single or multiple frame processors. (choices: {choices}, ...)', + 'face_debugger_items': 'load a single or multiple frame processors (choices: {choices})', + 'face_enhancer_model': 'choose the model responsible for enhancing the face', + 'face_enhancer_blend': 'blend the enhanced into the previous face', + 'face_swapper_model': 'choose the model responsible for swapping the face', + 'frame_colorizer_model': 'choose the model responsible for colorizing the frame', + 'frame_colorizer_blend': 'blend the colorized into the previous frame', + 'frame_colorizer_size': 'specify the size of the frame provided to the frame colorizer', + 'frame_enhancer_model': 'choose the model responsible for enhancing the frame', + 'frame_enhancer_blend': 'blend the enhanced into the previous frame', + 'lip_syncer_model': 'choose the model responsible for syncing the lips', + # uis + 'ui_layouts': 'launch a single or multiple UI layouts (choices: {choices}, ...)' + }, + 'uis': + { + # general + 'start_button': 'START', + 'stop_button': 'STOP', + 'clear_button': 'CLEAR', + # about + 'donate_button': 'DONATE', + # benchmark + 'benchmark_results_dataframe': 'BENCHMARK RESULTS', + # benchmark options + 'benchmark_runs_checkbox_group': 'BENCHMARK RUNS', + 'benchmark_cycles_slider': 'BENCHMARK CYCLES', + # common options + 'common_options_checkbox_group': 'OPTIONS', + # execution + 'execution_providers_checkbox_group': 'EXECUTION PROVIDERS', + # execution queue count + 'execution_queue_count_slider': 'EXECUTION QUEUE COUNT', + # execution thread count + 'execution_thread_count_slider': 'EXECUTION THREAD COUNT', + # face analyser + 'face_analyser_order_dropdown': 'FACE ANALYSER ORDER', + 'face_analyser_age_dropdown': 'FACE ANALYSER AGE', + 'face_analyser_gender_dropdown': 'FACE ANALYSER GENDER', + 'face_detector_model_dropdown': 'FACE DETECTOR MODEL', + 'face_detector_size_dropdown': 'FACE DETECTOR SIZE', + 'face_detector_score_slider': 'FACE DETECTOR SCORE', + 'face_landmarker_score_slider': 'FACE LANDMARKER SCORE', + # face masker + 'face_mask_types_checkbox_group': 'FACE MASK TYPES', + 'face_mask_blur_slider': 'FACE MASK BLUR', + 'face_mask_padding_top_slider': 'FACE MASK PADDING TOP', + 'face_mask_padding_right_slider': 'FACE MASK PADDING RIGHT', + 'face_mask_padding_bottom_slider': 'FACE MASK PADDING BOTTOM', + 'face_mask_padding_left_slider': 'FACE MASK PADDING LEFT', + 'face_mask_region_checkbox_group': 'FACE MASK REGIONS', + # face selector + 'face_selector_mode_dropdown': 'FACE SELECTOR MODE', + 'reference_face_gallery': 'REFERENCE FACE', + 'reference_face_distance_slider': 'REFERENCE FACE DISTANCE', + # frame processors + 'frame_processors_checkbox_group': 'FRAME PROCESSORS', + # frame processors options + 'face_debugger_items_checkbox_group': 'FACE DEBUGGER ITEMS', + 'face_enhancer_model_dropdown': 'FACE ENHANCER MODEL', + 'face_enhancer_blend_slider': 'FACE ENHANCER BLEND', + 'face_swapper_model_dropdown': 'FACE SWAPPER MODEL', + 'frame_colorizer_model_dropdown': 'FRAME COLORIZER MODEL', + 'frame_colorizer_blend_slider': 'FRAME COLORIZER BLEND', + 'frame_colorizer_size_dropdown': 'FRAME COLORIZER SIZE', + 'frame_enhancer_model_dropdown': 'FRAME ENHANCER MODEL', + 'frame_enhancer_blend_slider': 'FRAME ENHANCER BLEND', + 'lip_syncer_model_dropdown': 'LIP SYNCER MODEL', + # memory + 'video_memory_strategy_dropdown': 'VIDEO MEMORY STRATEGY', + 'system_memory_limit_slider': 'SYSTEM MEMORY LIMIT', + # output + 'output_image_or_video': 'OUTPUT', + # output options + 'output_path_textbox': 'OUTPUT PATH', + 'output_image_quality_slider': 'OUTPUT IMAGE QUALITY', + 'output_image_resolution_dropdown': 'OUTPUT IMAGE RESOLUTION', + 'output_video_encoder_dropdown': 'OUTPUT VIDEO ENCODER', + 'output_video_preset_dropdown': 'OUTPUT VIDEO PRESET', + 'output_video_quality_slider': 'OUTPUT VIDEO QUALITY', + 'output_video_resolution_dropdown': 'OUTPUT VIDEO RESOLUTION', + 'output_video_fps_slider': 'OUTPUT VIDEO FPS', + # preview + 'preview_image': 'PREVIEW', + 'preview_frame_slider': 'PREVIEW FRAME', + # source + 'source_file': 'SOURCE', + # target + 'target_file': 'TARGET', + # temp frame + 'temp_frame_format_dropdown': 'TEMP FRAME FORMAT', + # trim frame + 'trim_frame_start_slider': 'TRIM FRAME START', + 'trim_frame_end_slider': 'TRIM FRAME END', + # webcam + 'webcam_image': 'WEBCAM', + # webcam options + 'webcam_mode_radio': 'WEBCAM MODE', + 'webcam_resolution_dropdown': 'WEBCAM RESOLUTION', + 'webcam_fps_slider': 'WEBCAM FPS' + } +} + + +def get(key : str) -> Optional[str]: + if '.' in key: + section, name = key.split('.') + if section in WORDING and name in WORDING[section]: + return WORDING[section][name] + if key in WORDING: + return WORDING[key] + return None diff --git a/install.py b/install.py new file mode 100644 index 0000000000000000000000000000000000000000..6feca23e90646a0c741864a8d45e11201b1748c0 --- /dev/null +++ b/install.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 + +import os +import subprocess + +os.environ['PIP_BREAK_SYSTEM_PACKAGES'] = '1' +subprocess.call([ 'pip', 'install', 'inquirer', '-q' ]) + +from facefusion import installer + +if __name__ == '__main__': + installer.cli() diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000000000000000000000000000000000000..64218bc23688632a08c98ec4a0451ed46f8ed5e5 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,7 @@ +[mypy] +check_untyped_defs = True +disallow_any_generics = True +disallow_untyped_calls = True +disallow_untyped_defs = True +ignore_missing_imports = True +strict_optional = False diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..8787cd4f4f85d9beedc2d95a6044bedef34a15fa --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +filetype==1.2.0 +gradio==3.50.2 +numpy==1.26.4 +onnx==1.16.0 +onnxruntime==1.17.1 +opencv-python==4.8.1.78 +psutil==5.9.8 +tqdm==4.66.2 +scipy==1.12.0 diff --git a/run.py b/run.py new file mode 100644 index 0000000000000000000000000000000000000000..1c2a8bd9cffb4e9132f733337d798335f472e4c6 --- /dev/null +++ b/run.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 + +from facefusion import core + +if __name__ == '__main__': + core.cli() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/test_audio.py b/tests/test_audio.py new file mode 100644 index 0000000000000000000000000000000000000000..765acfb8f7097b54c790a9757cebcc4a5cbe474a --- /dev/null +++ b/tests/test_audio.py @@ -0,0 +1,26 @@ +import subprocess +import pytest + +from facefusion.audio import get_audio_frame, read_static_audio +from facefusion.download import conditional_download + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('.assets/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3' + ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/source.mp3', '.assets/examples/source.wav' ]) + + +def test_get_audio_frame() -> None: + assert get_audio_frame('.assets/examples/source.mp3', 25) is not None + assert get_audio_frame('.assets/examples/source.wav', 25) is not None + assert get_audio_frame('invalid', 25) is None + + +def test_read_static_audio() -> None: + assert len(read_static_audio('.assets/examples/source.mp3', 25)) == 280 + assert len(read_static_audio('.assets/examples/source.wav', 25)) == 280 + assert read_static_audio('invalid', 25) is None diff --git a/tests/test_cli_face_debugger.py b/tests/test_cli_face_debugger.py new file mode 100644 index 0000000000000000000000000000000000000000..51167af772210fcb67bcab96617cdf62315f055e --- /dev/null +++ b/tests/test_cli_face_debugger.py @@ -0,0 +1,31 @@ +import subprocess +import sys +import pytest + +from facefusion.download import conditional_download + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('.assets/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vframes', '1', '.assets/examples/target-240p.jpg' ]) + + +def test_debug_face_to_image() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_debugger', '-t', '.assets/examples/target-240p.jpg', '-o', '.assets/examples/test_debug_face_to_image.jpg', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'image succeed' in run.stdout.decode() + + +def test_debug_face_to_video() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_debugger', '-t', '.assets/examples/target-240p.mp4', '-o', '.assets/examples/test_debug_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'video succeed' in run.stdout.decode() diff --git a/tests/test_cli_face_enhancer.py b/tests/test_cli_face_enhancer.py new file mode 100644 index 0000000000000000000000000000000000000000..ccedf7d54e3ff92b75f45320477e818dec9a1562 --- /dev/null +++ b/tests/test_cli_face_enhancer.py @@ -0,0 +1,32 @@ +import subprocess +import sys +import pytest + +from facefusion.download import conditional_download + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('.assets/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vframes', '1', '.assets/examples/target-240p.jpg' ]) + + +def test_enhance_face_to_image() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_enhancer', '-t', '.assets/examples/target-240p.jpg', '-o', '.assets/examples/test_enhance_face_to_image.jpg', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'image succeed' in run.stdout.decode() + + +def test_enhance_face_to_video() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_enhancer', '-t', '.assets/examples/target-240p.mp4', '-o', '.assets/examples/test_enhance_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'video succeed' in run.stdout.decode() + diff --git a/tests/test_cli_face_swapper.py b/tests/test_cli_face_swapper.py new file mode 100644 index 0000000000000000000000000000000000000000..399870ab78ba75708d05ed2af324fb1d6bf3cac0 --- /dev/null +++ b/tests/test_cli_face_swapper.py @@ -0,0 +1,31 @@ +import subprocess +import sys +import pytest + +from facefusion.download import conditional_download + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('.assets/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vframes', '1', '.assets/examples/target-240p.jpg' ]) + + +def test_swap_face_to_image() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_swapper', '-s', '.assets/examples/source.jpg', '-t', '.assets/examples/target-240p.jpg', '-o', '.assets/examples/test_swap_face_to_image.jpg', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'image succeed' in run.stdout.decode() + + +def test_swap_face_to_video() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_swapper', '-s', '.assets/examples/source.jpg', '-t', '.assets/examples/target-240p.mp4', '-o', '.assets/examples/test_swap_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'video succeed' in run.stdout.decode() diff --git a/tests/test_cli_frame_colorizer.py b/tests/test_cli_frame_colorizer.py new file mode 100644 index 0000000000000000000000000000000000000000..97ff08ec797ca0c53e496f3b82633b22b5409dad --- /dev/null +++ b/tests/test_cli_frame_colorizer.py @@ -0,0 +1,32 @@ +import subprocess +import sys +import pytest + +from facefusion.download import conditional_download + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('.assets/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vframes', '1', '-vf', 'hue=s=0', '.assets/examples/target-240p-0sat.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'hue=s=0', '.assets/examples/target-240p-0sat.mp4' ]) + + +def test_colorize_frame_to_image() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_colorizer', '-t', '.assets/examples/target-240p-0sat.jpg', '-o', '.assets/examples/test_colorize_frame_to_image.jpg', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'image succeed' in run.stdout.decode() + + +def test_colorize_frame_to_video() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_colorizer', '-t', '.assets/examples/target-240p-0sat.mp4', '-o', '.assets/examples/test_colorize_frame_to_video.mp4', '--trim-frame-end', '10', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'video succeed' in run.stdout.decode() diff --git a/tests/test_cli_frame_enhancer.py b/tests/test_cli_frame_enhancer.py new file mode 100644 index 0000000000000000000000000000000000000000..89399f4638ca76ca01bc5d01f7cd90d8d120f0cb --- /dev/null +++ b/tests/test_cli_frame_enhancer.py @@ -0,0 +1,31 @@ +import subprocess +import sys +import pytest + +from facefusion.download import conditional_download + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('.assets/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vframes', '1', '.assets/examples/target-240p.jpg' ]) + + +def test_enhance_frame_to_image() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_enhancer', '-t', '.assets/examples/target-240p.jpg', '-o', '.assets/examples/test_enhance_frame_to_image.jpg', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'image succeed' in run.stdout.decode() + + +def test_enhance_frame_to_video() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_enhancer', '-t', '.assets/examples/target-240p.mp4', '-o', '.assets/examples/test_enhance_frame_to_video.mp4', '--trim-frame-end', '10', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'video succeed' in run.stdout.decode() diff --git a/tests/test_cli_lip_syncer.py b/tests/test_cli_lip_syncer.py new file mode 100644 index 0000000000000000000000000000000000000000..089ff41a005ea39e29e8aa0b593c10402e828b10 --- /dev/null +++ b/tests/test_cli_lip_syncer.py @@ -0,0 +1,32 @@ +import subprocess +import sys +import pytest + +from facefusion.download import conditional_download + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('.assets/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vframes', '1', '.assets/examples/target-240p.jpg' ]) + + +def test_sync_lip_to_image() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'lip_syncer', '-s', '.assets/examples/source.mp3', '-t', '.assets/examples/target-240p.jpg', '-o', '.assets/examples/test_sync_lip_to_image.jpg', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'image succeed' in run.stdout.decode() + + +def test_sync_lip_to_video() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'lip_syncer', '-s', '.assets/examples/source.mp3', '-t', '.assets/examples/target-240p.mp4', '-o', '.assets/examples/test_sync_lip_to_video.mp4', '--trim-frame-end', '10', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'video succeed' in run.stdout.decode() diff --git a/tests/test_common_helper.py b/tests/test_common_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..145cc69f4363d495401d4cff2b9a0ec4dfaa9b1c --- /dev/null +++ b/tests/test_common_helper.py @@ -0,0 +1,16 @@ +from facefusion.common_helper import create_metavar, create_int_range, create_float_range + + +def test_create_metavar() -> None: + assert create_metavar([ 1, 2, 3, 4, 5 ]) == '[1-5]' + + +def test_create_int_range() -> None: + assert create_int_range(0, 2, 1) == [ 0, 1, 2 ] + assert create_float_range(0, 1, 1) == [ 0, 1 ] + + +def test_create_float_range() -> None: + assert create_float_range(0.0, 1.0, 0.5) == [ 0.0, 0.5, 1.0 ] + assert create_float_range(0.0, 0.2, 0.05) == [ 0.0, 0.05, 0.10, 0.15, 0.20 ] + diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000000000000000000000000000000000000..8c830fd61e849fc8b07f579ae6932b08085de009 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,96 @@ +from configparser import ConfigParser +import pytest + +from facefusion import config + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + config.CONFIG = ConfigParser() + config.CONFIG.read_dict( + { + 'str': + { + 'valid': 'a', + 'unset': '' + }, + 'int': + { + 'valid': '1', + 'unset': '' + }, + 'float': + { + 'valid': '1.0', + 'unset': '' + }, + 'bool': + { + 'valid': 'True', + 'unset': '' + }, + 'str_list': + { + 'valid': 'a b c', + 'unset': '' + }, + 'int_list': + { + 'valid': '1 2 3', + 'unset': '' + }, + 'float_list': + { + 'valid': '1.0 2.0 3.0', + 'unset': '' + } + }) + + +def test_get_str_value() -> None: + assert config.get_str_value('str.valid') == 'a' + assert config.get_str_value('str.unset', 'b') == 'b' + assert config.get_str_value('str.unset') is None + assert config.get_str_value('str.invalid') is None + + +def test_get_int_value() -> None: + assert config.get_int_value('int.valid') == 1 + assert config.get_int_value('int.unset', '1') == 1 + assert config.get_int_value('int.unset') is None + assert config.get_int_value('int.invalid') is None + + +def test_get_float_value() -> None: + assert config.get_float_value('float.valid') == 1.0 + assert config.get_float_value('float.unset', '1.0') == 1.0 + assert config.get_float_value('float.unset') is None + assert config.get_float_value('float.invalid') is None + + +def test_get_bool_value() -> None: + assert config.get_bool_value('bool.valid') is True + assert config.get_bool_value('bool.unset', 'False') is False + assert config.get_bool_value('bool.unset') is None + assert config.get_bool_value('bool.invalid') is None + + +def test_get_str_list() -> None: + assert config.get_str_list('str_list.valid') == [ 'a', 'b', 'c' ] + assert config.get_str_list('str_list.unset', 'c b a') == [ 'c', 'b', 'a' ] + assert config.get_str_list('str_list.unset') is None + assert config.get_str_list('str_list.invalid') is None + + +def test_get_int_list() -> None: + assert config.get_int_list('int_list.valid') == [ 1, 2, 3 ] + assert config.get_int_list('int_list.unset', '3 2 1') == [ 3, 2, 1 ] + assert config.get_int_list('int_list.unset') is None + assert config.get_int_list('int_list.invalid') is None + + +def test_get_float_list() -> None: + assert config.get_float_list('float_list.valid') == [ 1.0, 2.0, 3.0 ] + assert config.get_float_list('float_list.unset', '3.0 2.0 1.0') == [ 3.0, 2.0, 1.0 ] + assert config.get_float_list('float_list.unset') is None + assert config.get_float_list('float_list.invalid') is None diff --git a/tests/test_download.py b/tests/test_download.py new file mode 100644 index 0000000000000000000000000000000000000000..6df94b171221f0fc078348413de4c4886c254faf --- /dev/null +++ b/tests/test_download.py @@ -0,0 +1,23 @@ +import pytest + +from facefusion.download import conditional_download, get_download_size, is_download_done + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('.assets/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + + +def test_get_download_size() -> None: + assert get_download_size('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4') == 191675 + assert get_download_size('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-360p.mp4') == 370732 + assert get_download_size('invalid') == 0 + + +def test_is_download_done() -> None: + assert is_download_done('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', '.assets/examples/target-240p.mp4') is True + assert is_download_done('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', 'invalid') is False + assert is_download_done('invalid', 'invalid') is False diff --git a/tests/test_execution.py b/tests/test_execution.py new file mode 100644 index 0000000000000000000000000000000000000000..a847f4248e82e70723e2d1427bb5332784fc5734 --- /dev/null +++ b/tests/test_execution.py @@ -0,0 +1,21 @@ +from facefusion.execution import encode_execution_providers, decode_execution_providers, apply_execution_provider_options + + +def test_encode_execution_providers() -> None: + assert encode_execution_providers([ 'CPUExecutionProvider' ]) == [ 'cpu' ] + + +def test_decode_execution_providers() -> None: + assert decode_execution_providers([ 'cpu' ]) == [ 'CPUExecutionProvider' ] + + +def test_multiple_execution_providers() -> None: + execution_provider_with_options =\ + [ + 'CPUExecutionProvider', + ('CUDAExecutionProvider', + { + 'cudnn_conv_algo_search': 'DEFAULT' + }) + ] + assert apply_execution_provider_options([ 'CPUExecutionProvider', 'CUDAExecutionProvider' ]) == execution_provider_with_options diff --git a/tests/test_face_analyser.py b/tests/test_face_analyser.py new file mode 100644 index 0000000000000000000000000000000000000000..957dfc8d8871f4236ef19b41c3ab05ab0511b926 --- /dev/null +++ b/tests/test_face_analyser.py @@ -0,0 +1,103 @@ +import subprocess +import pytest + +import facefusion.globals +from facefusion.download import conditional_download +from facefusion.face_analyser import pre_check, clear_face_analyser, get_one_face +from facefusion.typing import Face +from facefusion.vision import read_static_image + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('.assets/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg' + ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/source.jpg', '-vf', 'crop=iw*0.8:ih*0.8', '.assets/examples/source-80crop.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/source.jpg', '-vf', 'crop=iw*0.7:ih*0.7', '.assets/examples/source-70crop.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/source.jpg', '-vf', 'crop=iw*0.6:ih*0.6', '.assets/examples/source-60crop.jpg' ]) + + +@pytest.fixture(autouse = True) +def before_each() -> None: + facefusion.globals.face_detector_score = 0.5 + facefusion.globals.face_landmarker_score = 0.5 + facefusion.globals.face_recognizer_model = 'arcface_inswapper' + clear_face_analyser() + + +def test_get_one_face_with_retinaface() -> None: + facefusion.globals.face_detector_model = 'retinaface' + facefusion.globals.face_detector_size = '320x320' + + pre_check() + source_paths =\ + [ + '.assets/examples/source.jpg', + '.assets/examples/source-80crop.jpg', + '.assets/examples/source-70crop.jpg', + '.assets/examples/source-60crop.jpg' + ] + for source_path in source_paths: + source_frame = read_static_image(source_path) + face = get_one_face(source_frame) + + assert isinstance(face, Face) + + +def test_get_one_face_with_scrfd() -> None: + facefusion.globals.face_detector_model = 'scrfd' + facefusion.globals.face_detector_size = '640x640' + + pre_check() + source_paths =\ + [ + '.assets/examples/source.jpg', + '.assets/examples/source-80crop.jpg', + '.assets/examples/source-70crop.jpg', + '.assets/examples/source-60crop.jpg' + ] + for source_path in source_paths: + source_frame = read_static_image(source_path) + face = get_one_face(source_frame) + + assert isinstance(face, Face) + + +def test_get_one_face_with_yoloface() -> None: + facefusion.globals.face_detector_model = 'yoloface' + facefusion.globals.face_detector_size = '640x640' + + pre_check() + source_paths =\ + [ + '.assets/examples/source.jpg', + '.assets/examples/source-80crop.jpg', + '.assets/examples/source-70crop.jpg', + '.assets/examples/source-60crop.jpg' + ] + for source_path in source_paths: + source_frame = read_static_image(source_path) + face = get_one_face(source_frame) + + assert isinstance(face, Face) + + +def test_get_one_face_with_yunet() -> None: + facefusion.globals.face_detector_model = 'yunet' + facefusion.globals.face_detector_size = '640x640' + + pre_check() + source_paths =\ + [ + '.assets/examples/source.jpg', + '.assets/examples/source-80crop.jpg', + '.assets/examples/source-70crop.jpg', + '.assets/examples/source-60crop.jpg' + ] + for source_path in source_paths: + source_frame = read_static_image(source_path) + face = get_one_face(source_frame) + + assert isinstance(face, Face) diff --git a/tests/test_ffmpeg.py b/tests/test_ffmpeg.py new file mode 100644 index 0000000000000000000000000000000000000000..5c4bd2d0d7141492495d9d2bf370aa9a5f44078b --- /dev/null +++ b/tests/test_ffmpeg.py @@ -0,0 +1,113 @@ +import glob +import subprocess +import pytest + +import facefusion.globals +from facefusion import process_manager +from facefusion.filesystem import get_temp_directory_path, create_temp, clear_temp +from facefusion.download import conditional_download +from facefusion.ffmpeg import extract_frames, read_audio_buffer + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + process_manager.start() + conditional_download('.assets/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/source.mp3', '.assets/examples/source.wav' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'fps=25', '.assets/examples/target-240p-25fps.mp4' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'fps=30', '.assets/examples/target-240p-30fps.mp4' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'fps=60', '.assets/examples/target-240p-60fps.mp4' ]) + + +@pytest.fixture(scope = 'function', autouse = True) +def before_each() -> None: + facefusion.globals.trim_frame_start = None + facefusion.globals.trim_frame_end = None + facefusion.globals.temp_frame_format = 'jpg' + + +def test_extract_frames() -> None: + target_paths =\ + [ + '.assets/examples/target-240p-25fps.mp4', + '.assets/examples/target-240p-30fps.mp4', + '.assets/examples/target-240p-60fps.mp4' + ] + + for target_path in target_paths: + temp_directory_path = get_temp_directory_path(target_path) + create_temp(target_path) + + assert extract_frames(target_path, '452x240', 30.0) is True + assert len(glob.glob1(temp_directory_path, '*.jpg')) == 324 + + clear_temp(target_path) + + +def test_extract_frames_with_trim_start() -> None: + facefusion.globals.trim_frame_start = 224 + data_provider =\ + [ + ('.assets/examples/target-240p-25fps.mp4', 55), + ('.assets/examples/target-240p-30fps.mp4', 100), + ('.assets/examples/target-240p-60fps.mp4', 212) + ] + + for target_path, frame_total in data_provider: + temp_directory_path = get_temp_directory_path(target_path) + create_temp(target_path) + + assert extract_frames(target_path, '452x240', 30.0) is True + assert len(glob.glob1(temp_directory_path, '*.jpg')) == frame_total + + clear_temp(target_path) + + +def test_extract_frames_with_trim_start_and_trim_end() -> None: + facefusion.globals.trim_frame_start = 124 + facefusion.globals.trim_frame_end = 224 + data_provider =\ + [ + ('.assets/examples/target-240p-25fps.mp4', 120), + ('.assets/examples/target-240p-30fps.mp4', 100), + ('.assets/examples/target-240p-60fps.mp4', 50) + ] + + for target_path, frame_total in data_provider: + temp_directory_path = get_temp_directory_path(target_path) + create_temp(target_path) + + assert extract_frames(target_path, '452x240', 30.0) is True + assert len(glob.glob1(temp_directory_path, '*.jpg')) == frame_total + + clear_temp(target_path) + + +def test_extract_frames_with_trim_end() -> None: + facefusion.globals.trim_frame_end = 100 + data_provider =\ + [ + ('.assets/examples/target-240p-25fps.mp4', 120), + ('.assets/examples/target-240p-30fps.mp4', 100), + ('.assets/examples/target-240p-60fps.mp4', 50) + ] + + for target_path, frame_total in data_provider: + temp_directory_path = get_temp_directory_path(target_path) + create_temp(target_path) + + assert extract_frames(target_path, '426x240', 30.0) is True + assert len(glob.glob1(temp_directory_path, '*.jpg')) == frame_total + + clear_temp(target_path) + + +def test_read_audio_buffer() -> None: + assert isinstance(read_audio_buffer('.assets/examples/source.mp3', 1, 1), bytes) + assert isinstance(read_audio_buffer('.assets/examples/source.wav', 1, 1), bytes) + assert read_audio_buffer('.assets/examples/invalid.mp3', 1, 1) is None diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py new file mode 100644 index 0000000000000000000000000000000000000000..d149cba2d8ef86d166879124b63bd944338cdef2 --- /dev/null +++ b/tests/test_filesystem.py @@ -0,0 +1,76 @@ +import pytest + +from facefusion.download import conditional_download +from facefusion.filesystem import is_file, is_directory, is_audio, has_audio, is_image, has_image, is_video, filter_audio_paths, filter_image_paths, list_directory + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('.assets/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + ]) + + +def test_is_file() -> None: + assert is_file('.assets/examples/source.jpg') is True + assert is_file('.assets/examples') is False + assert is_file('invalid') is False + + +def test_is_directory() -> None: + assert is_directory('.assets/examples') is True + assert is_directory('.assets/examples/source.jpg') is False + assert is_directory('invalid') is False + + +def test_is_audio() -> None: + assert is_audio('.assets/examples/source.mp3') is True + assert is_audio('.assets/examples/source.jpg') is False + assert is_audio('invalid') is False + + +def test_has_audio() -> None: + assert has_audio([ '.assets/examples/source.mp3' ]) is True + assert has_audio([ '.assets/examples/source.mp3', '.assets/examples/source.jpg' ]) is True + assert has_audio([ '.assets/examples/source.jpg', '.assets/examples/source.jpg' ]) is False + assert has_audio([ 'invalid' ]) is False + + +def test_is_image() -> None: + assert is_image('.assets/examples/source.jpg') is True + assert is_image('.assets/examples/target-240p.mp4') is False + assert is_image('invalid') is False + + +def test_has_image() -> None: + assert has_image([ '.assets/examples/source.jpg' ]) is True + assert has_image([ '.assets/examples/source.jpg', '.assets/examples/source.mp3' ]) is True + assert has_image([ '.assets/examples/source.mp3', '.assets/examples/source.mp3' ]) is False + assert has_image([ 'invalid' ]) is False + + +def test_is_video() -> None: + assert is_video('.assets/examples/target-240p.mp4') is True + assert is_video('.assets/examples/source.jpg') is False + assert is_video('invalid') is False + + +def test_filter_audio_paths() -> None: + assert filter_audio_paths([ '.assets/examples/source.jpg', '.assets/examples/source.mp3' ]) == [ '.assets/examples/source.mp3' ] + assert filter_audio_paths([ '.assets/examples/source.jpg', '.assets/examples/source.jpg' ]) == [] + assert filter_audio_paths([ 'invalid' ]) == [] + + +def test_filter_image_paths() -> None: + assert filter_image_paths([ '.assets/examples/source.jpg', '.assets/examples/source.mp3' ]) == [ '.assets/examples/source.jpg' ] + assert filter_image_paths([ '.assets/examples/source.mp3', '.assets/examples/source.mp3' ]) == [] + assert filter_audio_paths([ 'invalid' ]) == [] + + +def test_list_directory() -> None: + assert list_directory('.assets/examples') + assert list_directory('.assets/examples/source.jpg') is None + assert list_directory('invalid') is None diff --git a/tests/test_memory.py b/tests/test_memory.py new file mode 100644 index 0000000000000000000000000000000000000000..8ab07f5b0a6d6ce9e1cc50886bd7107a826ee983 --- /dev/null +++ b/tests/test_memory.py @@ -0,0 +1,9 @@ +import platform + +from facefusion.memory import limit_system_memory + + +def test_limit_system_memory() -> None: + assert limit_system_memory(4) is True + if platform.system().lower() == 'darwin' or platform.system().lower() == 'linux': + assert limit_system_memory(1024) is False diff --git a/tests/test_normalizer.py b/tests/test_normalizer.py new file mode 100644 index 0000000000000000000000000000000000000000..28b79149cf29a18431b09f5d7d8de543c160a42a --- /dev/null +++ b/tests/test_normalizer.py @@ -0,0 +1,31 @@ +import platform + +from facefusion.normalizer import normalize_output_path, normalize_padding, normalize_fps + + +def test_normalize_output_path() -> None: + if platform.system().lower() == 'linux' or platform.system().lower() == 'darwin': + assert normalize_output_path('.assets/examples/target-240p.mp4', '.assets/examples/target-240p.mp4') == '.assets/examples/target-240p.mp4' + assert normalize_output_path('.assets/examples/target-240p.mp4', '.assets/examples').startswith('.assets/examples/target-240p') + assert normalize_output_path('.assets/examples/target-240p.mp4', '.assets/examples').endswith('.mp4') + assert normalize_output_path('.assets/examples/target-240p.mp4', '.assets/examples/output.mp4') == '.assets/examples/output.mp4' + assert normalize_output_path('.assets/examples/target-240p.mp4', '.assets/examples/invalid') is None + assert normalize_output_path('.assets/examples/target-240p.mp4', '.assets/invalid/output.mp4') is None + assert normalize_output_path('.assets/examples/target-240p.mp4', 'invalid') is None + assert normalize_output_path('.assets/examples/target-240p.mp4', None) is None + assert normalize_output_path(None, '.assets/examples/output.mp4') is None + + +def test_normalize_padding() -> None: + assert normalize_padding([ 0, 0, 0, 0 ]) == (0, 0, 0, 0) + assert normalize_padding([ 1 ]) == (1, 1, 1, 1) + assert normalize_padding([ 1, 2 ]) == (1, 2, 1, 2) + assert normalize_padding([ 1, 2, 3 ]) == (1, 2, 3, 2) + assert normalize_padding(None) is None + + +def test_normalize_fps() -> None: + assert normalize_fps(0.0) == 1.0 + assert normalize_fps(25.0) == 25.0 + assert normalize_fps(61.0) == 60.0 + assert normalize_fps(None) is None diff --git a/tests/test_process_manager.py b/tests/test_process_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..1fbe74bb82b2eb82fb90958013ba033ef05f9185 --- /dev/null +++ b/tests/test_process_manager.py @@ -0,0 +1,22 @@ +from facefusion.process_manager import set_process_state, is_processing, is_stopping, is_pending, start, stop, end + + +def test_start() -> None: + set_process_state('pending') + start() + + assert is_processing() + + +def test_stop() -> None: + set_process_state('processing') + stop() + + assert is_stopping() + + +def test_end() -> None: + set_process_state('processing') + end() + + assert is_pending() diff --git a/tests/test_vision.py b/tests/test_vision.py new file mode 100644 index 0000000000000000000000000000000000000000..6cf48647a5c6f51a63ae12489c5b71217e0b6a57 --- /dev/null +++ b/tests/test_vision.py @@ -0,0 +1,109 @@ +import subprocess +import pytest + +from facefusion.download import conditional_download +from facefusion.vision import detect_image_resolution, restrict_image_resolution, create_image_resolutions, get_video_frame, count_video_frame_total, detect_video_fps, restrict_video_fps, detect_video_resolution, restrict_video_resolution, create_video_resolutions, normalize_resolution, pack_resolution, unpack_resolution + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('.assets/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-1080p.mp4' + ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vframes', '1', '.assets/examples/target-240p.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-1080p.mp4', '-vframes', '1', '.assets/examples/target-1080p.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vframes', '1', '-vf', 'transpose=0', '.assets/examples/target-240p-90deg.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '.assets/examples/target-1080p-90deg.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'fps=25', '.assets/examples/target-240p-25fps.mp4' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'fps=30', '.assets/examples/target-240p-30fps.mp4' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'fps=60', '.assets/examples/target-240p-60fps.mp4' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'transpose=0', '.assets/examples/target-240p-90deg.mp4' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-1080p.mp4', '-vf', 'transpose=0', '.assets/examples/target-1080p-90deg.mp4' ]) + + +def test_detect_image_resolution() -> None: + assert detect_image_resolution('.assets/examples/target-240p.jpg') == (426, 226) + assert detect_image_resolution('.assets/examples/target-240p-90deg.jpg') == (226, 426) + assert detect_image_resolution('.assets/examples/target-1080p.jpg') == (2048, 1080) + assert detect_image_resolution('.assets/examples/target-1080p-90deg.jpg') == (1080, 2048) + assert detect_image_resolution('invalid') is None + + +def test_restrict_image_resolution() -> None: + assert restrict_image_resolution('.assets/examples/target-1080p.jpg', (426, 226)) == (426, 226) + assert restrict_image_resolution('.assets/examples/target-1080p.jpg', (2048, 1080)) == (2048, 1080) + assert restrict_image_resolution('.assets/examples/target-1080p.jpg', (4096, 2160)) == (2048, 1080) + + +def test_create_image_resolutions() -> None: + assert create_image_resolutions((426, 226)) == [ '106x56', '212x112', '320x170', '426x226', '640x340', '852x452', '1064x564', '1278x678', '1492x792', '1704x904' ] + assert create_image_resolutions((226, 426)) == [ '56x106', '112x212', '170x320', '226x426', '340x640', '452x852', '564x1064', '678x1278', '792x1492', '904x1704' ] + assert create_image_resolutions((2048, 1080)) == [ '512x270', '1024x540', '1536x810', '2048x1080', '3072x1620', '4096x2160', '5120x2700', '6144x3240', '7168x3780', '8192x4320' ] + assert create_image_resolutions((1080, 2048)) == [ '270x512', '540x1024', '810x1536', '1080x2048', '1620x3072', '2160x4096', '2700x5120', '3240x6144', '3780x7168', '4320x8192' ] + assert create_image_resolutions(None) == [] + + +def test_get_video_frame() -> None: + assert get_video_frame('.assets/examples/target-240p-25fps.mp4') is not None + assert get_video_frame('invalid') is None + + +def test_count_video_frame_total() -> None: + assert count_video_frame_total('.assets/examples/target-240p-25fps.mp4') == 270 + assert count_video_frame_total('.assets/examples/target-240p-30fps.mp4') == 324 + assert count_video_frame_total('.assets/examples/target-240p-60fps.mp4') == 648 + assert count_video_frame_total('invalid') == 0 + + +def test_detect_video_fps() -> None: + assert detect_video_fps('.assets/examples/target-240p-25fps.mp4') == 25.0 + assert detect_video_fps('.assets/examples/target-240p-30fps.mp4') == 30.0 + assert detect_video_fps('.assets/examples/target-240p-60fps.mp4') == 60.0 + assert detect_video_fps('invalid') is None + + +def test_restrict_video_fps() -> None: + assert restrict_video_fps('.assets/examples/target-1080p.mp4', 20.0) == 20.0 + assert restrict_video_fps('.assets/examples/target-1080p.mp4', 25.0) == 25.0 + assert restrict_video_fps('.assets/examples/target-1080p.mp4', 60.0) == 25.0 + + +def test_detect_video_resolution() -> None: + assert detect_video_resolution('.assets/examples/target-240p.mp4') == (426, 226) + assert detect_video_resolution('.assets/examples/target-240p-90deg.mp4') == (226, 426) + assert detect_video_resolution('.assets/examples/target-1080p.mp4') == (2048, 1080) + assert detect_video_resolution('.assets/examples/target-1080p-90deg.mp4') == (1080, 2048) + assert detect_video_resolution('invalid') is None + + +def test_restrict_video_resolution() -> None: + assert restrict_video_resolution('.assets/examples/target-1080p.mp4', (426, 226)) == (426, 226) + assert restrict_video_resolution('.assets/examples/target-1080p.mp4', (2048, 1080)) == (2048, 1080) + assert restrict_video_resolution('.assets/examples/target-1080p.mp4', (4096, 2160)) == (2048, 1080) + + +def test_create_video_resolutions() -> None: + assert create_video_resolutions((426, 226)) == [ '426x226', '452x240', '678x360', '904x480', '1018x540', '1358x720', '2036x1080', '2714x1440', '4072x2160', '8144x4320' ] + assert create_video_resolutions((226, 426)) == [ '226x426', '240x452', '360x678', '480x904', '540x1018', '720x1358', '1080x2036', '1440x2714', '2160x4072', '4320x8144' ] + assert create_video_resolutions((2048, 1080)) == [ '456x240', '682x360', '910x480', '1024x540', '1366x720', '2048x1080', '2730x1440', '4096x2160', '8192x4320' ] + assert create_video_resolutions((1080, 2048)) == [ '240x456', '360x682', '480x910', '540x1024', '720x1366', '1080x2048', '1440x2730', '2160x4096', '4320x8192' ] + assert create_video_resolutions(None) == [] + + +def test_normalize_resolution() -> None: + assert normalize_resolution((2.5, 2.5)) == (2, 2) + assert normalize_resolution((3.0, 3.0)) == (4, 4) + assert normalize_resolution((6.5, 6.5)) == (6, 6) + + +def test_pack_resolution() -> None: + assert pack_resolution((1, 1)) == '0x0' + assert pack_resolution((2, 2)) == '2x2' + + +def test_unpack_resolution() -> None: + assert unpack_resolution('0x0') == (0, 0) + assert unpack_resolution('2x2') == (2, 2) diff --git a/tests/test_wording.py b/tests/test_wording.py new file mode 100644 index 0000000000000000000000000000000000000000..1deaa773616c036aa6a74dd3a7c5209824e526ea --- /dev/null +++ b/tests/test_wording.py @@ -0,0 +1,7 @@ +from facefusion import wording + + +def test_get() -> None: + assert wording.get('python_not_supported') + assert wording.get('help.source') + assert wording.get('invalid') is None