Spaces:
Runtime error
Runtime error
Haoxin Chen
commited on
Commit
·
4949f04
1
Parent(s):
d98ccdc
update videocrafter2
Browse files- app.py +2 -37
- configs/inference_t2v_512_v2.0.yaml +77 -0
- t2v_test.py +8 -8
app.py
CHANGED
@@ -3,7 +3,6 @@ import sys
|
|
3 |
import gradio as gr
|
4 |
# from demo_test import Text2Video, Image2Video
|
5 |
from t2v_test import Text2Video
|
6 |
-
from i2v_test import Image2Video
|
7 |
sys.path.insert(1, os.path.join(sys.path[0], 'lvdm'))
|
8 |
|
9 |
t2v_examples = [
|
@@ -15,18 +14,14 @@ t2v_examples = [
|
|
15 |
['Robot dancing in times square',25,12,1,16],
|
16 |
]
|
17 |
|
18 |
-
i2v_examples = [
|
19 |
-
['prompts/i2v_prompts/horse.png', 'horses are walking on the grassland', 50, 12, 1, 16]
|
20 |
-
]
|
21 |
|
22 |
def videocrafter_demo(result_dir='./tmp/'):
|
23 |
text2video = Text2Video(result_dir)
|
24 |
-
image2video = Image2Video(result_dir)
|
25 |
with gr.Blocks(analytics_enabled=False) as videocrafter_iface:
|
26 |
-
gr.Markdown("<div align='center'> <h2>
|
27 |
<a style='font-size:18px;color: #000000' href='https://github.com/AILab-CVC/VideoCrafter'> Github </div>")
|
28 |
|
29 |
-
gr.Markdown("<b> You may duplicate the space and upgrade to GPU in settings for better performance and faster inference without waiting in the queue. <a style='display:inline-block' href='https://huggingface.co/spaces/VideoCrafter/VideoCrafter?duplicate=true'> <img src='https://bit.ly/3gLdBN6' alt='Duplicate Space'></a> </b>")
|
30 |
#######t2v#######
|
31 |
with gr.Tab(label="Text2Video"):
|
32 |
with gr.Column():
|
@@ -54,36 +49,6 @@ def videocrafter_demo(result_dir='./tmp/'):
|
|
54 |
inputs=[input_text,steps,cfg_scale,eta,fps],
|
55 |
outputs=[output_video_1],
|
56 |
)
|
57 |
-
#######image2video######
|
58 |
-
with gr.Tab(label='Image2Video'):
|
59 |
-
with gr.Column():
|
60 |
-
with gr.Row():
|
61 |
-
with gr.Column():
|
62 |
-
with gr.Row():
|
63 |
-
i2v_input_image = gr.Image(label="Input Image").style(width=256)
|
64 |
-
with gr.Row():
|
65 |
-
i2v_input_text = gr.Text(label='Prompts')
|
66 |
-
with gr.Row():
|
67 |
-
i2v_eta = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label='ETA', value=1.0, elem_id="i2v_eta")
|
68 |
-
i2v_cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.5, label='CFG Scale', value=12.0, elem_id="i2v_cfg_scale")
|
69 |
-
with gr.Row():
|
70 |
-
i2v_steps = gr.Slider(minimum=1, maximum=60, step=1, elem_id="i2v_steps", label="Sampling steps", value=50)
|
71 |
-
i2v_fps = gr.Slider(minimum=4, maximum=32, step=1, elem_id="i2v_fps", label="Generative fps", value=16)
|
72 |
-
i2v_end_btn = gr.Button("Send")
|
73 |
-
with gr.Tab(label='Result'):
|
74 |
-
with gr.Row():
|
75 |
-
i2v_output_video = gr.Video(label="Generated Video").style(width=512)
|
76 |
-
|
77 |
-
gr.Examples(examples=i2v_examples,
|
78 |
-
inputs=[i2v_input_image, i2v_input_text, i2v_steps, i2v_cfg_scale, i2v_eta, i2v_fps],
|
79 |
-
outputs=[i2v_output_video],
|
80 |
-
fn = image2video.get_image,
|
81 |
-
cache_examples=os.getenv('SYSTEM') == 'spaces',
|
82 |
-
)
|
83 |
-
i2v_end_btn.click(inputs=[i2v_input_image, i2v_input_text, i2v_steps, i2v_cfg_scale, i2v_eta, i2v_fps],
|
84 |
-
outputs=[i2v_output_video],
|
85 |
-
fn = image2video.get_image
|
86 |
-
)
|
87 |
|
88 |
return videocrafter_iface
|
89 |
|
|
|
3 |
import gradio as gr
|
4 |
# from demo_test import Text2Video, Image2Video
|
5 |
from t2v_test import Text2Video
|
|
|
6 |
sys.path.insert(1, os.path.join(sys.path[0], 'lvdm'))
|
7 |
|
8 |
t2v_examples = [
|
|
|
14 |
['Robot dancing in times square',25,12,1,16],
|
15 |
]
|
16 |
|
|
|
|
|
|
|
17 |
|
18 |
def videocrafter_demo(result_dir='./tmp/'):
|
19 |
text2video = Text2Video(result_dir)
|
|
|
20 |
with gr.Blocks(analytics_enabled=False) as videocrafter_iface:
|
21 |
+
gr.Markdown("<div align='center'> <h2> VideoCrafter2: Overcoming Data Limitations for High-Quality Video Diffusion Models </span> </h2> \
|
22 |
<a style='font-size:18px;color: #000000' href='https://github.com/AILab-CVC/VideoCrafter'> Github </div>")
|
23 |
|
24 |
+
# gr.Markdown("<b> You may duplicate the space and upgrade to GPU in settings for better performance and faster inference without waiting in the queue. <a style='display:inline-block' href='https://huggingface.co/spaces/VideoCrafter/VideoCrafter?duplicate=true'> <img src='https://bit.ly/3gLdBN6' alt='Duplicate Space'></a> </b>")
|
25 |
#######t2v#######
|
26 |
with gr.Tab(label="Text2Video"):
|
27 |
with gr.Column():
|
|
|
49 |
inputs=[input_text,steps,cfg_scale,eta,fps],
|
50 |
outputs=[output_video_1],
|
51 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
return videocrafter_iface
|
54 |
|
configs/inference_t2v_512_v2.0.yaml
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
target: lvdm.models.ddpm3d.LatentDiffusion
|
3 |
+
params:
|
4 |
+
linear_start: 0.00085
|
5 |
+
linear_end: 0.012
|
6 |
+
num_timesteps_cond: 1
|
7 |
+
timesteps: 1000
|
8 |
+
first_stage_key: video
|
9 |
+
cond_stage_key: caption
|
10 |
+
cond_stage_trainable: false
|
11 |
+
conditioning_key: crossattn
|
12 |
+
image_size:
|
13 |
+
- 40
|
14 |
+
- 64
|
15 |
+
channels: 4
|
16 |
+
scale_by_std: false
|
17 |
+
scale_factor: 0.18215
|
18 |
+
use_ema: false
|
19 |
+
uncond_type: empty_seq
|
20 |
+
use_scale: true
|
21 |
+
scale_b: 0.7
|
22 |
+
unet_config:
|
23 |
+
target: lvdm.modules.networks.openaimodel3d.UNetModel
|
24 |
+
params:
|
25 |
+
in_channels: 4
|
26 |
+
out_channels: 4
|
27 |
+
model_channels: 320
|
28 |
+
attention_resolutions:
|
29 |
+
- 4
|
30 |
+
- 2
|
31 |
+
- 1
|
32 |
+
num_res_blocks: 2
|
33 |
+
channel_mult:
|
34 |
+
- 1
|
35 |
+
- 2
|
36 |
+
- 4
|
37 |
+
- 4
|
38 |
+
num_head_channels: 64
|
39 |
+
transformer_depth: 1
|
40 |
+
context_dim: 1024
|
41 |
+
use_linear: true
|
42 |
+
use_checkpoint: true
|
43 |
+
temporal_conv: true
|
44 |
+
temporal_attention: true
|
45 |
+
temporal_selfatt_only: true
|
46 |
+
use_relative_position: false
|
47 |
+
use_causal_attention: false
|
48 |
+
temporal_length: 16
|
49 |
+
addition_attention: true
|
50 |
+
fps_cond: true
|
51 |
+
first_stage_config:
|
52 |
+
target: lvdm.models.autoencoder.AutoencoderKL
|
53 |
+
params:
|
54 |
+
embed_dim: 4
|
55 |
+
monitor: val/rec_loss
|
56 |
+
ddconfig:
|
57 |
+
double_z: true
|
58 |
+
z_channels: 4
|
59 |
+
resolution: 512
|
60 |
+
in_channels: 3
|
61 |
+
out_ch: 3
|
62 |
+
ch: 128
|
63 |
+
ch_mult:
|
64 |
+
- 1
|
65 |
+
- 2
|
66 |
+
- 4
|
67 |
+
- 4
|
68 |
+
num_res_blocks: 2
|
69 |
+
attn_resolutions: []
|
70 |
+
dropout: 0.0
|
71 |
+
lossconfig:
|
72 |
+
target: torch.nn.Identity
|
73 |
+
cond_stage_config:
|
74 |
+
target: lvdm.modules.encoders.condition.FrozenOpenCLIPEmbedder
|
75 |
+
params:
|
76 |
+
freeze: true
|
77 |
+
layer: penultimate
|
t2v_test.py
CHANGED
@@ -12,8 +12,8 @@ class Text2Video():
|
|
12 |
self.result_dir = result_dir
|
13 |
if not os.path.exists(self.result_dir):
|
14 |
os.mkdir(self.result_dir)
|
15 |
-
ckpt_path='checkpoints/
|
16 |
-
config_file='configs/
|
17 |
config = OmegaConf.load(config_file)
|
18 |
model_config = config.pop("model", OmegaConf.create())
|
19 |
model_config['params']['unet_config']['params']['use_checkpoint']=False
|
@@ -40,7 +40,7 @@ class Text2Video():
|
|
40 |
batch_size=1
|
41 |
channels = model.model.diffusion_model.in_channels
|
42 |
frames = model.temporal_length
|
43 |
-
h, w =
|
44 |
noise_shape = [batch_size, channels, frames, h, w]
|
45 |
|
46 |
#prompts = batch_size * [""]
|
@@ -61,15 +61,15 @@ class Text2Video():
|
|
61 |
return os.path.join(self.result_dir, f"{prompt_str}.mp4")
|
62 |
|
63 |
def download_model(self):
|
64 |
-
REPO_ID = 'VideoCrafter/
|
65 |
filename_list = ['model.ckpt']
|
66 |
-
if not os.path.exists('./checkpoints/
|
67 |
-
os.makedirs('./checkpoints/
|
68 |
for filename in filename_list:
|
69 |
-
local_file = os.path.join('./checkpoints/
|
70 |
|
71 |
if not os.path.exists(local_file):
|
72 |
-
hf_hub_download(repo_id=REPO_ID, filename=filename, local_dir='./checkpoints/
|
73 |
|
74 |
|
75 |
if __name__ == '__main__':
|
|
|
12 |
self.result_dir = result_dir
|
13 |
if not os.path.exists(self.result_dir):
|
14 |
os.mkdir(self.result_dir)
|
15 |
+
ckpt_path='checkpoints/base_512_v2/model.ckpt'
|
16 |
+
config_file='configs/inference_t2v_512_v2.0.yaml'
|
17 |
config = OmegaConf.load(config_file)
|
18 |
model_config = config.pop("model", OmegaConf.create())
|
19 |
model_config['params']['unet_config']['params']['use_checkpoint']=False
|
|
|
40 |
batch_size=1
|
41 |
channels = model.model.diffusion_model.in_channels
|
42 |
frames = model.temporal_length
|
43 |
+
h, w = 320 // 8, 512 // 8
|
44 |
noise_shape = [batch_size, channels, frames, h, w]
|
45 |
|
46 |
#prompts = batch_size * [""]
|
|
|
61 |
return os.path.join(self.result_dir, f"{prompt_str}.mp4")
|
62 |
|
63 |
def download_model(self):
|
64 |
+
REPO_ID = 'VideoCrafter/VideoCrafter2'
|
65 |
filename_list = ['model.ckpt']
|
66 |
+
if not os.path.exists('./checkpoints/base_512_v2/'):
|
67 |
+
os.makedirs('./checkpoints/base_512_v2/')
|
68 |
for filename in filename_list:
|
69 |
+
local_file = os.path.join('./checkpoints/base_512_v2/', filename)
|
70 |
|
71 |
if not os.path.exists(local_file):
|
72 |
+
hf_hub_download(repo_id=REPO_ID, filename=filename, local_dir='./checkpoints/base_512_v2/', local_dir_use_symlinks=False)
|
73 |
|
74 |
|
75 |
if __name__ == '__main__':
|