diff --git "a/FateZero/colab_fatezero.ipynb" "b/FateZero/colab_fatezero.ipynb"
new file mode 100644--- /dev/null
+++ "b/FateZero/colab_fatezero.ipynb"
@@ -0,0 +1,528 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "fZ_xQvU70UQc"
+ },
+ "source": [
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ChenyangQiQi/FateZero/blob/main/colab_fatezero.ipynb)\n",
+ "\n",
+ "# FateZero: Fusing Attentions for Zero-shot Text-based Video Editing\n",
+ "\n",
+ "[Chenyang Qi](https://chenyangqiqi.github.io/), [Xiaodong Cun](http://vinthony.github.io/), [Yong Zhang](https://yzhang2016.github.io), [Chenyang Lei](https://chenyanglei.github.io/), [Xintao Wang](https://xinntao.github.io/), [Ying Shan](https://scholar.google.com/citations?hl=zh-CN&user=4oXBp9UAAAAJ), and [Qifeng Chen](https://cqf.io)\n",
+ "\n",
+ "\n",
+ "[![Project Website](https://img.shields.io/badge/Project-Website-orange)](https://fate-zero-edit.github.io/)\n",
+ "[![arXiv](https://img.shields.io/badge/arXiv-2303.09535-b31b1b.svg)](https://arxiv.org/abs/2303.09535)\n",
+ "[![GitHub](https://img.shields.io/github/stars/ChenyangQiQi/FateZero?style=social)](https://github.com/ChenyangQiQi/FateZero)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "XU7NuMAA2drw",
+ "outputId": "82c4a90d-0ed6-4ad5-c74d-0a0ed3d98bbe"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Tesla T4, 15360 MiB, 15101 MiB\n"
+ ]
+ }
+ ],
+ "source": [
+ "#@markdown Check type of GPU and VRAM available.\n",
+ "!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "D1PRgre3Gt5U",
+ "outputId": "ac1db329-a373-4c82-9b0d-77f4e5cb7140"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Cloning into '/content/FateZero'...\n",
+ "remote: Enumerating objects: 332, done.\u001b[K\n",
+ "remote: Counting objects: 100% (53/53), done.\u001b[K\n",
+ "remote: Compressing objects: 100% (7/7), done.\u001b[K\n",
+ "remote: Total 332 (delta 50), reused 47 (delta 46), pack-reused 279\u001b[K\n",
+ "Receiving objects: 100% (332/332), 34.21 MiB | 14.26 MiB/s, done.\n",
+ "Resolving deltas: 100% (157/157), done.\n",
+ "/content/FateZero\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.3/63.3 MB\u001b[0m \u001b[31m15.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m145.0/145.0 KB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Building wheel for lit (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.9/524.9 KB\u001b[0m \u001b[31m35.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.3/6.3 MB\u001b[0m \u001b[31m74.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.5/62.5 MB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.6/13.6 MB\u001b[0m \u001b[31m96.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.8/212.8 KB\u001b[0m \u001b[31m25.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 KB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━��━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.6/41.6 KB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 KB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.8/15.8 MB\u001b[0m \u001b[31m88.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m26.9/26.9 MB\u001b[0m \u001b[31m55.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.9/50.9 MB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.8/199.8 KB\u001b[0m \u001b[31m23.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m105.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 KB\u001b[0m \u001b[31m15.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.5/71.5 KB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 KB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.5/106.5 KB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 KB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m64.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 KB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.8/57.8 KB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.1/57.1 KB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.5/50.5 KB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.2/114.2 KB\u001b[0m \u001b[31m14.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m158.8/158.8 KB\u001b[0m \u001b[31m20.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m264.6/264.6 KB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.9/66.9 KB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.6/69.6 KB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 KB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Building wheel for ffmpy (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
+ ]
+ }
+ ],
+ "source": [
+ "#@title Install requirements\n",
+ "\n",
+ "!git clone https://github.com/ChenyangQiQi/FateZero /content/FateZero\n",
+ "%cd /content/FateZero\n",
+ "# %pip install -r requirements.txt\n",
+ "%pip install -q -U --pre triton\n",
+ "%pip install -q diffusers[torch]==0.11.1 transformers==4.26.0 bitsandbytes==0.35.4 \\\n",
+ "decord accelerate omegaconf einops ftfy gradio imageio-ffmpeg xformers"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "m6I6kZNG3Inb",
+ "outputId": "f3bcb6eb-a79c-4810-d575-e926c8e7564f"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Updated git hooks.\n",
+ "Git LFS initialized.\n",
+ "Cloning into 'ckpt/CompVis/stable-diffusion-v1-4'...\n",
+ "remote: Enumerating objects: 738, done.\u001b[K\n",
+ "remote: Counting objects: 100% (12/12), done.\u001b[K\n",
+ "remote: Compressing objects: 100% (12/12), done.\u001b[K\n",
+ "remote: Total 738 (delta 3), reused 1 (delta 0), pack-reused 726\u001b[K\n",
+ "Receiving objects: 100% (738/738), 682.52 KiB | 954.00 KiB/s, done.\n",
+ "Resolving deltas: 100% (123/123), done.\n",
+ "Filtering content: 100% (8/8), 10.20 GiB | 63.59 MiB/s, done.\n",
+ "[*] MODEL_NAME=./ckpt/CompVis/stable-diffusion-v1-4\n"
+ ]
+ }
+ ],
+ "source": [
+ "#@title Download pretrained model\n",
+ "\n",
+ "#@markdown Name/Path of the initial model.\n",
+ "MODEL_NAME = \"CompVis/stable-diffusion-v1-4\" #@param {type:\"string\"}\n",
+ "\n",
+ "#@markdown If model should be download from a remote repo. Untick it if the model is loaded from a local path.\n",
+ "download_pretrained_model = True #@param {type:\"boolean\"}\n",
+ "if download_pretrained_model:\n",
+ " !git lfs install\n",
+ " !git clone https://huggingface.co./$MODEL_NAME ckpt/$MODEL_NAME\n",
+ " MODEL_NAME = f\"./ckpt/{MODEL_NAME}\"\n",
+ "print(f\"[*] MODEL_NAME={MODEL_NAME}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "qn5ILIyDJIcX"
+ },
+ "source": [
+ "# **Usage**\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "i4L2yDXGflaC"
+ },
+ "source": [
+ "## FateZero Edit with low resource cost\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "fXZs1veYIMMw",
+ "outputId": "c665eaba-ef12-498e-d173-6432e977fc07"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "save new configue to config/car-turn.yaml\n"
+ ]
+ }
+ ],
+ "source": [
+ "#@markdown Edit config\n",
+ "\n",
+ "#@markdown More details of the configuration will be given soon.\n",
+ "\n",
+ "from omegaconf import OmegaConf\n",
+ "\n",
+ "VIDEO_FILE = 'data/car-turn' #@param {type:\"string\"}\n",
+ "\n",
+ "VIDEO_ID = VIDEO_FILE.split('/')[-1]\n",
+ "\n",
+ "RESULT_DIR = 'result/'+VIDEO_ID\n",
+ "\n",
+ "CONFIG_NAME = \"config/\"+VIDEO_ID+\".yaml\" \n",
+ "\n",
+ "source_prompt = \"a silver jeep driving down a curvy road in the countryside\" #@param {type:\"string\"}\n",
+ "edit_prompt = \"watercolor painting of a silver jeep driving down a curvy road in the countryside\" #@param {type:\"string\"}\n",
+ "EMPHYSIS_WORD = \"watercolor\" #@param {type:\"string\"}\n",
+ "EMPHYSIS_VALUE = 10 #@param {type:\"number\"}\n",
+ "video_length = 8 #@param {type:\"number\"}\n",
+ "INVERSION_STEP = 8 #@param {type:\"number\"}\n",
+ "REPLACE_STRENGTH = 0.8 #@param {type:\"slider\", min:0, max:1, step:0.1}\n",
+ "STORE_ATTENTION_ON_disk = False #@param {type:\"boolean\"}\n",
+ "width = 512 \n",
+ "height = 512 \n",
+ "\n",
+ "config = {\n",
+ " \"pretrained_model_path\": MODEL_NAME,\n",
+ " \"logdir\": RESULT_DIR,\n",
+ " \"train_dataset\": {\n",
+ " \"path\": VIDEO_FILE,\n",
+ " \"prompt\": source_prompt,\n",
+ " \"n_sample_frame\": video_length,\n",
+ " \"sampling_rate\": 1,\n",
+ " \"stride\": 80,\n",
+ " \"offset\": \n",
+ " {\n",
+ " \"left\": 0,\n",
+ " \"right\": 0,\n",
+ " \"top\": 0,\n",
+ " \"bottom\": 0,\n",
+ " }\n",
+ " },\n",
+ " \"validation_sample_logger_config\":{\n",
+ " \"use_train_latents\": True,\n",
+ " \"use_inversion_attention\": True,\n",
+ " \"guidance_scale\": 7.5,\n",
+ " \"prompts\":[\n",
+ " source_prompt,\n",
+ " edit_prompt,\n",
+ " ],\n",
+ " \"p2p_config\":[ \n",
+ " {\n",
+ " \"cross_replace_steps\":{\n",
+ " \"default_\":0.8\n",
+ " },\n",
+ " \"self_replace_steps\": 0.8,\n",
+ " \"masked_self_attention\": True,\n",
+ " \"bend_th\": [2, 2],\n",
+ " \"is_replace_controller\": False \n",
+ " },\n",
+ " {\n",
+ " \"cross_replace_steps\":{\n",
+ " \"default_\":0.8\n",
+ " },\n",
+ " \"self_replace_steps\": 0.8,\n",
+ " \"eq_params\":{\n",
+ " \"words\":[EMPHYSIS_WORD],\n",
+ " \"values\": [EMPHYSIS_VALUE]\n",
+ " },\n",
+ " \"use_inversion_attention\": True,\n",
+ " \"is_replace_controller\": False \n",
+ " }]\n",
+ " ,\n",
+ " \"clip_length\": \"${..train_dataset.n_sample_frame}\",\n",
+ " \"sample_seeds\": [0],\n",
+ " \"num_inference_steps\": INVERSION_STEP,\n",
+ " \"prompt2prompt_edit\": True\n",
+ " },\n",
+ " \"disk_store\": STORE_ATTENTION_ON_disk,\n",
+ " \"model_config\":{\n",
+ " \"lora\": 160,\n",
+ " \"SparseCausalAttention_index\": ['mid'],\n",
+ " \"least_sc_channel\": 640\n",
+ " },\n",
+ " \"test_pipeline_config\":{\n",
+ " \"target\": \"video_diffusion.pipelines.p2pDDIMSpatioTemporalPipeline.p2pDDIMSpatioTemporalPipeline\",\n",
+ " \"num_inference_steps\": \"${..validation_sample_logger.num_inference_steps}\"\n",
+ " },\n",
+ " \"epsilon\": 1e-5,\n",
+ " \"train_steps\": 10,\n",
+ " \"seed\": 0,\n",
+ " \"learning_rate\": 1e-5,\n",
+ " \"train_temporal_conv\": False,\n",
+ " \"guidance_scale\": \"${validation_sample_logger_config.guidance_scale}\"\n",
+ "}\n",
+ "\n",
+ "OmegaConf.save(config, CONFIG_NAME)\n",
+ "print('save new configue to ', CONFIG_NAME)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "jjcSXTp-u-Eg",
+ "outputId": "194d964e-08dc-4d3d-c0fd-7e56ed2eb187"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "2023-03-22 09:04:20.819710: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n",
+ "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+ "2023-03-22 09:04:24.565385: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
+ "2023-03-22 09:04:24.565750: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
+ "2023-03-22 09:04:24.565782: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
+ "The following values were not passed to `accelerate launch` and had defaults used instead:\n",
+ "\t`--num_processes` was set to a value of `1`\n",
+ "\t`--num_machines` was set to a value of `1`\n",
+ "\t`--mixed_precision` was set to a value of `'no'`\n",
+ "\t`--dynamo_backend` was set to a value of `'no'`\n",
+ "To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.\n",
+ "2023-03-22 09:04:31.342590: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
+ "2023-03-22 09:04:31.342704: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
+ "2023-03-22 09:04:31.342734: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
+ "The config attributes {'scaling_factor': 0.18215} were passed to AutoencoderKL, but are not expected and will be ignored. Please verify your config.json configuration file.\n",
+ "use fp16\n",
+ "Number of attention layer registered 32\n",
+ " Invert clean image to noise latents by DDIM and Unet\n",
+ "100% 8/8 [00:25<00:00, 3.19s/it]\n",
+ "IMAGEIO FFMPEG_WRITER WARNING: input image is not divisible by macro_block_size=16, resizing from (3328, 307) to (3328, 320) to ensure video compatibility with most codecs and players. To prevent resizing, make your input image divisible by the macro_block_size or set the macro_block_size to 1 (risking incompatibility).\n",
+ "Number of attention layer registered 32\n",
+ "Generating sample images: 0% 0/2 [00:00, ?it/s] len_source: {11}, len_target: {11}, equal_length: True\n",
+ "use refine controller\n",
+ "Number of attention layer registered 32\n",
+ "\n",
+ " 0% 0/8 [00:00, ?it/s]\u001b[A\n",
+ " 12% 1/8 [00:01<00:13, 1.87s/it]\u001b[A\n",
+ " 25% 2/8 [00:03<00:11, 1.94s/it]\u001b[A\n",
+ " 38% 3/8 [00:05<00:09, 1.95s/it]\u001b[A\n",
+ " 50% 4/8 [00:07<00:07, 1.95s/it]\u001b[A\n",
+ " 62% 5/8 [00:09<00:05, 1.95s/it]\u001b[A\n",
+ " 75% 6/8 [00:11<00:03, 1.94s/it]\u001b[A\n",
+ " 88% 7/8 [00:13<00:01, 1.91s/it]\u001b[A\n",
+ "100% 8/8 [00:15<00:00, 1.91s/it]\n",
+ "Number of attention layer registered 32\n",
+ "IMAGEIO FFMPEG_WRITER WARNING: input image is not divisible by macro_block_size=16, resizing from (512, 536) to (512, 544) to ensure video compatibility with most codecs and players. To prevent resizing, make your input image divisible by the macro_block_size or set the macro_block_size to 1 (risking incompatibility).\n",
+ "IMAGEIO FFMPEG_WRITER WARNING: input image is not divisible by macro_block_size=16, resizing from (3328, 307) to (3328, 320) to ensure video compatibility with most codecs and players. To prevent resizing, make your input image divisible by the macro_block_size or set the macro_block_size to 1 (risking incompatibility).\n",
+ "Generating sample images: 50% 1/2 [00:21<00:21, 21.02s/it] len_source: {11}, len_target: {14}, equal_length: False\n",
+ "use refine controller\n",
+ "Number of attention layer registered 32\n",
+ "\n",
+ " 0% 0/8 [00:00, ?it/s]\u001b[A\n",
+ " 12% 1/8 [00:01<00:13, 1.94s/it]\u001b[A\n",
+ " 25% 2/8 [00:03<00:11, 1.93s/it]\u001b[A\n",
+ " 38% 3/8 [00:05<00:09, 1.92s/it]\u001b[A\n",
+ " 50% 4/8 [00:07<00:07, 1.92s/it]\u001b[A\n",
+ " 62% 5/8 [00:09<00:05, 1.93s/it]\u001b[A\n",
+ " 75% 6/8 [00:11<00:03, 1.93s/it]\u001b[A\n",
+ " 88% 7/8 [00:13<00:01, 1.89s/it]\u001b[A\n",
+ "100% 8/8 [00:15<00:00, 1.90s/it]\n",
+ "Number of attention layer registered 32\n",
+ "IMAGEIO FFMPEG_WRITER WARNING: input image is not divisible by macro_block_size=16, resizing from (512, 557) to (512, 560) to ensure video compatibility with most codecs and players. To prevent resizing, make your input image divisible by the macro_block_size or set the macro_block_size to 1 (risking incompatibility).\n",
+ "IMAGEIO FFMPEG_WRITER WARNING: input image is not divisible by macro_block_size=16, resizing from (4096, 307) to (4096, 320) to ensure video compatibility with most codecs and players. To prevent resizing, make your input image divisible by the macro_block_size or set the macro_block_size to 1 (risking incompatibility).\n",
+ "Generating sample images: 100% 2/2 [00:41<00:00, 20.61s/it]\n",
+ "IMAGEIO FFMPEG_WRITER WARNING: input image is not divisible by macro_block_size=16, resizing from (3328, 614) to (3328, 624) to ensure video compatibility with most codecs and players. To prevent resizing, make your input image divisible by the macro_block_size or set the macro_block_size to 1 (risking incompatibility).\n"
+ ]
+ }
+ ],
+ "source": [
+ "!accelerate launch test_fatezero.py --config=$CONFIG_NAME"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ToNG4fd_dTbF"
+ },
+ "source": [
+ "### Show the results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 592
+ },
+ "id": "jXgi8HM4c-DA",
+ "outputId": "552cf87d-8258-4502-8bad-b5a86e70f4a0"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "./result/car-turn_230322-090435/sample/step_0.mp4\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Display animation: ./result/car-turn_230322-090435/sample/step_0.mp4\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from IPython.display import HTML\n",
+ "from base64 import b64encode\n",
+ "import os, sys\n",
+ "import glob\n",
+ "\n",
+ "# get the last from results\n",
+ "mp4_name = sorted(glob.glob('./result/*/sample/step_0.mp4'))[-1]\n",
+ "\n",
+ "print(mp4_name)\n",
+ "mp4 = open('{}'.format(mp4_name),'rb').read()\n",
+ "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
+ "\n",
+ "print('Display animation: {}'.format(mp4_name), file=sys.stderr)\n",
+ "display(HTML(\"\"\"\n",
+ " \n",
+ " \"\"\" % data_url))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "cBb3wTEXfhRo"
+ },
+ "source": [
+ "## Edit your video"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "mQR2cjDZV9tu"
+ },
+ "outputs": [],
+ "source": [
+ "#@markdown Upload your video(.mp4) by running this cell or skip this cell using the default data\n",
+ "\n",
+ "import os\n",
+ "from google.colab import files\n",
+ "import shutil\n",
+ "from IPython.display import HTML\n",
+ "from base64 import b64encode\n",
+ "\n",
+ "uploaded = files.upload()\n",
+ "for filename in uploaded.keys():\n",
+ " dst_path = os.path.join(\"data\", filename)\n",
+ " shutil.move(filename, dst_path)\n",
+ " \n",
+ "file_id = dst_path.replace('.mp4', '')\n",
+ "\n",
+ "! mkdir -p $file_id\n",
+ "! ffmpeg -hide_banner -loglevel error -i $dst_path -vf scale=\"512:512\" -vf fps=25 $file_id/%05d.png\n",
+ "\n",
+ "mp4 = open('{}'.format(dst_path),'rb').read()\n",
+ "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
+ "\n",
+ "display(HTML(\"\"\"\n",
+ " \n",
+ " \"\"\" % data_url))\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "provenance": []
+ },
+ "gpuClass": "standard",
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "2.7.18 (default, Jul 1 2022, 12:27:04) \n[GCC 9.4.0]"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}