Spaces:
Running
on
A10G
Running
on
A10G
bug fix
Browse files- .gitignore +4 -0
- app.py +282 -23
- app_.py +0 -319
- example_videos/exp_input_1.mp4 +0 -0
- example_videos/exp_input_2.mp4 +0 -0
- example_videos/exp_input_3.mp4 +0 -0
- example_videos/exp_input_4.mp4 +0 -0
- example_videos/exp_input_5.mp4 +0 -0
- example_videos/exp_output_1.mp4 +0 -0
- example_videos/exp_output_2.mp4 +0 -0
- example_videos/exp_output_3.mp4 +0 -0
- example_videos/exp_output_4.mp4 +0 -0
- example_videos/exp_output_5.mp4 +0 -0
- gradio_cached_examples/59/Output Example/994f36ecf77e57c9b298/exp_output_1.mp4 +0 -0
- gradio_cached_examples/59/log.csv +0 -2
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
results/*
|
2 |
+
pretrained_models/*
|
3 |
+
gradio_cached_examples/*
|
4 |
+
generated/*
|
app.py
CHANGED
@@ -1,38 +1,297 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
-
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
|
|
|
|
|
|
8 |
|
9 |
-
|
10 |
-
return x
|
11 |
|
|
|
|
|
|
|
12 |
|
13 |
-
|
|
|
14 |
|
15 |
-
|
16 |
-
txt_2 = gr.Textbox(label="Input 2")
|
17 |
-
txt_3 = gr.Textbox(value="", label="Output")
|
18 |
-
btn = gr.Button(value="Submit")
|
19 |
-
btn.click(combine, inputs=[txt, txt_2], outputs=[txt_3])
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
with gr.Row():
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
cache_examples=True,
|
35 |
-
)
|
36 |
|
37 |
if __name__ == "__main__":
|
38 |
-
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import cv2
|
3 |
import os
|
4 |
+
import torch
|
5 |
+
import argparse
|
6 |
+
import os
|
7 |
+
import sys
|
8 |
+
import yaml
|
9 |
+
import datetime
|
10 |
+
sys.path.append(os.path.dirname(os.getcwd()))
|
11 |
+
from pipelines.sd_controlnet_rave import RAVE
|
12 |
+
from pipelines.sd_multicontrolnet_rave import RAVE_MultiControlNet
|
13 |
+
import shutil
|
14 |
+
import subprocess
|
15 |
+
import utils.constants as const
|
16 |
+
import utils.video_grid_utils as vgu
|
17 |
+
import warnings
|
18 |
+
warnings.filterwarnings("ignore")
|
19 |
+
import pprint
|
20 |
+
import glob
|
21 |
+
|
22 |
+
|
23 |
+
def init_device():
|
24 |
+
device_name = 'cuda' if torch.cuda.is_available() else 'cpu'
|
25 |
+
device = torch.device(device_name)
|
26 |
+
return device
|
27 |
+
|
28 |
+
def init_paths(input_ns):
|
29 |
+
if input_ns.save_folder == None or input_ns.save_folder == '':
|
30 |
+
input_ns.save_folder = input_ns.video_name
|
31 |
+
else:
|
32 |
+
input_ns.save_folder = os.path.join(input_ns.save_folder, input_ns.video_name)
|
33 |
+
save_dir = os.path.join(const.OUTPUT_PATH, input_ns.save_folder)
|
34 |
+
os.makedirs(save_dir, exist_ok=True)
|
35 |
+
save_idx = max([int(x[-5:]) for x in os.listdir(save_dir)])+1 if os.listdir(save_dir) != [] else 0
|
36 |
+
input_ns.save_path = os.path.join(save_dir, f'{input_ns.positive_prompts}-{str(save_idx).zfill(5)}')
|
37 |
+
|
38 |
+
|
39 |
+
if '-' in input_ns.preprocess_name:
|
40 |
+
input_ns.hf_cn_path = [const.PREPROCESSOR_DICT[i] for i in input_ns.preprocess_name.split('-')]
|
41 |
+
else:
|
42 |
+
input_ns.hf_cn_path = const.PREPROCESSOR_DICT[input_ns.preprocess_name]
|
43 |
+
input_ns.hf_path = "runwayml/stable-diffusion-v1-5"
|
44 |
+
|
45 |
+
input_ns.inverse_path = os.path.join(const.GENERATED_DATA_PATH, 'inverses', input_ns.video_name, f'{input_ns.preprocess_name}_{input_ns.model_id}_{input_ns.grid_size}x{input_ns.grid_size}_{input_ns.pad}')
|
46 |
+
input_ns.control_path = os.path.join(const.GENERATED_DATA_PATH, 'controls', input_ns.video_name, f'{input_ns.preprocess_name}_{input_ns.grid_size}x{input_ns.grid_size}_{input_ns.pad}')
|
47 |
+
os.makedirs(input_ns.control_path, exist_ok=True)
|
48 |
+
os.makedirs(input_ns.inverse_path, exist_ok=True)
|
49 |
+
os.makedirs(input_ns.save_path, exist_ok=True)
|
50 |
+
return input_ns
|
51 |
+
|
52 |
+
def install_civitai_model(model_id):
|
53 |
+
full_path = os.path.join(const.CWD, 'CIVIT_AI', 'diffusers_models', model_id, '*')
|
54 |
+
if len(glob.glob(full_path)) > 0:
|
55 |
+
full_path = glob.glob(full_path)[0]
|
56 |
+
return full_path
|
57 |
+
install_path = os.path.join(const.CWD, 'CIVIT_AI', 'safetensors')
|
58 |
+
install_path_model = os.path.join(const.CWD, 'CIVIT_AI', 'safetensors', model_id)
|
59 |
+
diffusers_path = os.path.join(const.CWD, 'CIVIT_AI', 'diffusers_models', model_id)
|
60 |
+
convert_py_path = os.path.join(const.CWD, 'CIVIT_AI', 'convert.py')
|
61 |
+
os.makedirs(install_path, exist_ok=True)
|
62 |
+
os.makedirs(diffusers_path, exist_ok=True)
|
63 |
+
subprocess.run(f'wget https://civitai.com/api/download/models/{model_id} --content-disposition --directory {install_path_model}'.split())
|
64 |
+
model_name = glob.glob(os.path.join(install_path, model_id, '*'))[0]
|
65 |
+
model_name2 = os.path.basename(glob.glob(os.path.join(install_path, model_id, '*'))[0]).replace('.safetensors', '')
|
66 |
+
diffusers_path_model_name = os.path.join(const.CWD, 'CIVIT_AI', 'diffusers_models', model_id, model_name2)
|
67 |
+
print(model_name)
|
68 |
+
subprocess.run(f'python {convert_py_path} --checkpoint_path {model_name} --dump_path {diffusers_path_model_name} --from_safetensors'.split())
|
69 |
+
subprocess.run(f'rm -rf {install_path}'.split())
|
70 |
+
return diffusers_path_model_name
|
71 |
+
|
72 |
+
def run(*args):
|
73 |
+
batch_size = 4
|
74 |
+
batch_size_vae = 1
|
75 |
+
is_ddim_inversion = True
|
76 |
+
is_shuffle = True
|
77 |
+
num_inference_steps = 20
|
78 |
+
num_inversion_step = 20
|
79 |
+
cond_step_start = 0.0
|
80 |
+
give_control_inversion = True
|
81 |
+
model_id = 'SD 1.5'
|
82 |
+
inversion_prompt = ''
|
83 |
+
save_folder = ''
|
84 |
+
list_of_inputs = [x for x in args]
|
85 |
+
input_ns = argparse.Namespace(**{})
|
86 |
+
input_ns.video_path = list_of_inputs[0] # video_path
|
87 |
+
input_ns.video_name = os.path.basename(input_ns.video_path).replace('.mp4', '').replace('.gif', '')
|
88 |
+
input_ns.preprocess_name = list_of_inputs[1]
|
89 |
+
|
90 |
+
input_ns.batch_size = batch_size
|
91 |
+
input_ns.batch_size_vae = batch_size_vae
|
92 |
+
|
93 |
+
input_ns.cond_step_start = cond_step_start
|
94 |
+
input_ns.controlnet_conditioning_scale = list_of_inputs[2]
|
95 |
+
input_ns.controlnet_guidance_end = list_of_inputs[3]
|
96 |
+
input_ns.controlnet_guidance_start = list_of_inputs[4]
|
97 |
+
|
98 |
+
input_ns.give_control_inversion = give_control_inversion
|
99 |
+
|
100 |
+
input_ns.grid_size = list_of_inputs[5]
|
101 |
+
input_ns.sample_size = list_of_inputs[6]
|
102 |
+
input_ns.pad = list_of_inputs[7]
|
103 |
+
input_ns.guidance_scale = list_of_inputs[8]
|
104 |
+
input_ns.inversion_prompt = inversion_prompt
|
105 |
+
|
106 |
+
input_ns.is_ddim_inversion = is_ddim_inversion
|
107 |
+
input_ns.is_shuffle = is_shuffle
|
108 |
|
109 |
+
input_ns.negative_prompts = list_of_inputs[9]
|
110 |
+
input_ns.num_inference_steps = num_inference_steps
|
111 |
+
input_ns.num_inversion_step = num_inversion_step
|
112 |
+
input_ns.positive_prompts = list_of_inputs[10]
|
113 |
+
input_ns.save_folder = save_folder
|
114 |
|
115 |
+
input_ns.seed = list_of_inputs[11]
|
116 |
+
input_ns.model_id = const.MODEL_IDS[model_id]
|
117 |
+
# input_ns.width = list_of_inputs[23]
|
118 |
+
# input_ns.height = list_of_inputs[24]
|
119 |
+
# input_ns.original_size = list_of_inputs[25]
|
120 |
+
diffusers_model_path = os.path.join(const.CWD, 'CIVIT_AI', 'diffusers_models')
|
121 |
+
os.makedirs(diffusers_model_path, exist_ok=True)
|
122 |
+
if 'model_id' not in list(input_ns.__dict__.keys()):
|
123 |
+
input_ns.model_id = "None"
|
124 |
+
|
125 |
+
if str(input_ns.model_id) != 'None':
|
126 |
+
input_ns.model_id = install_civitai_model(input_ns.model_id)
|
127 |
|
128 |
+
|
129 |
+
device = init_device()
|
130 |
+
input_ns = init_paths(input_ns)
|
131 |
|
132 |
+
input_ns.image_pil_list = vgu.prepare_video_to_grid(input_ns.video_path, input_ns.sample_size, input_ns.grid_size, input_ns.pad)
|
|
|
133 |
|
134 |
+
print(input_ns.video_path)
|
135 |
+
input_ns.sample_size = len(input_ns.image_pil_list)
|
136 |
+
print(f'Frame count: {len(input_ns.image_pil_list)}')
|
137 |
|
138 |
+
controlnet_class = RAVE_MultiControlNet if '-' in str(input_ns.controlnet_conditioning_scale) else RAVE
|
139 |
+
|
140 |
|
141 |
+
CN = controlnet_class(device)
|
|
|
|
|
|
|
|
|
142 |
|
143 |
+
CN.init_models(input_ns.hf_cn_path, input_ns.hf_path, input_ns.preprocess_name, input_ns.model_id)
|
144 |
+
|
145 |
+
input_dict = vars(input_ns)
|
146 |
+
pp = pprint.PrettyPrinter(indent=4)
|
147 |
+
pp.pprint(input_dict)
|
148 |
+
yaml_dict = {k:v for k,v in input_dict.items() if k != 'image_pil_list'}
|
149 |
+
|
150 |
+
start_time = datetime.datetime.now()
|
151 |
+
if '-' in str(input_ns.controlnet_conditioning_scale):
|
152 |
+
res_vid, control_vid_1, control_vid_2 = CN(input_dict)
|
153 |
+
else:
|
154 |
+
res_vid, control_vid = CN(input_dict)
|
155 |
+
end_time = datetime.datetime.now()
|
156 |
+
save_name = f"{'-'.join(input_ns.positive_prompts.split())}_cstart-{input_ns.controlnet_guidance_start}_gs-{input_ns.guidance_scale}_pre-{'-'.join((input_ns.preprocess_name.replace('-','+').split('_')))}_cscale-{input_ns.controlnet_conditioning_scale}_grid-{input_ns.grid_size}_pad-{input_ns.pad}_model-{os.path.basename(input_ns.model_id)}"
|
157 |
+
res_vid[0].save(os.path.join(input_ns.save_path, f'{save_name}.gif'), save_all=True, append_images=res_vid[1:], loop=10000)
|
158 |
+
control_vid[0].save(os.path.join(input_ns.save_path, f'control_{save_name}.gif'), save_all=True, append_images=control_vid[1:], optimize=False, loop=10000)
|
159 |
+
|
160 |
+
yaml_dict['total_time'] = (end_time - start_time).total_seconds()
|
161 |
+
yaml_dict['total_number_of_frames'] = len(res_vid)
|
162 |
+
yaml_dict['sec_per_frame'] = yaml_dict['total_time']/yaml_dict['total_number_of_frames']
|
163 |
+
with open(os.path.join(input_ns.save_path, 'config.yaml'), 'w') as yaml_file:
|
164 |
+
yaml.dump(yaml_dict, yaml_file)
|
165 |
+
|
166 |
+
return os.path.join(input_ns.save_path, f'{save_name}.gif'), os.path.join(input_ns.save_path, f'control_{save_name}.gif')
|
167 |
+
|
168 |
+
|
169 |
+
def output_video_fn(video_path, text_prompt):
|
170 |
+
fold_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "example_videos")
|
171 |
+
video_path = os.path.join(fold_path, os.path.basename(video_path).replace('input', 'output'))
|
172 |
+
print(video_path, text_prompt)
|
173 |
+
return video_path
|
174 |
+
|
175 |
+
block = gr.Blocks().queue()
|
176 |
+
with block:
|
177 |
with gr.Row():
|
178 |
+
gr.Markdown('## RAVE: Randomized Noise Shuffling for Fast and Consistent Video Editing with Diffusion Models')
|
179 |
+
with gr.Row():
|
180 |
+
with gr.Column():
|
181 |
+
with gr.Row():
|
182 |
+
input_path = gr.File(label='Upload Input Video', file_types=['.mp4'], scale=1)
|
183 |
+
|
184 |
+
inputs = gr.Video(label='Input Video',
|
185 |
+
format='mp4',
|
186 |
+
visible=True,
|
187 |
+
interactive=False,
|
188 |
+
scale=5)
|
189 |
+
input_path.upload(lambda x:x, inputs=[input_path], outputs=[inputs])
|
190 |
+
|
191 |
+
with gr.Row():
|
192 |
+
example_input = gr.Video(label='Input Example',
|
193 |
+
format='mp4',
|
194 |
+
visible=True,
|
195 |
+
interactive=False)
|
196 |
+
|
197 |
+
example_output = gr.Video(label='Output Example',
|
198 |
+
format='mp4',
|
199 |
+
visible=True,
|
200 |
+
interactive=False)
|
201 |
+
# input(os.path.join(os.path.dirname(os.path.abspath(__file__)), "example_videos", "exp_input_1.mp4"))
|
202 |
+
ex_prompt = gr.Textbox(label='Text Prompt', interactive=False)
|
203 |
+
with gr.Row():
|
204 |
+
ex_list = []
|
205 |
+
ex_prompt_dict = {
|
206 |
+
'1': "A black panther",
|
207 |
+
'2': "A medieval knight",
|
208 |
+
'3': "Swarovski blue crystal swan",
|
209 |
+
'4': "Switzerland SBB CFF FFS train",
|
210 |
+
'5': "White cupcakes, moving on the table",
|
211 |
+
}
|
212 |
+
for i in range(1,6):
|
213 |
+
ex_list.append([os.path.join(os.path.dirname(os.path.abspath(__file__)), "example_videos", f"exp_input_{i}.mp4"), ex_prompt_dict[str(i)]])
|
214 |
+
|
215 |
+
ex = gr.Examples(
|
216 |
+
examples=ex_list,
|
217 |
+
inputs=[example_input, ex_prompt],
|
218 |
+
outputs=example_output,
|
219 |
+
fn=output_video_fn,
|
220 |
+
cache_examples=True,)
|
221 |
+
|
222 |
+
|
223 |
+
|
224 |
+
with gr.Column():
|
225 |
+
with gr.Row():
|
226 |
+
result_video = gr.Image(label='Edited Video',
|
227 |
+
interactive=False)
|
228 |
+
control_video = gr.Image(label='Control Video',
|
229 |
+
interactive=False)
|
230 |
|
231 |
+
with gr.Row():
|
232 |
+
preprocess_list = ['depth_zoe', 'lineart_realistic', 'lineart_standard', 'softedge_hed']
|
233 |
+
preprocess_name = gr.Dropdown(preprocess_list,
|
234 |
+
label='Control type',
|
235 |
+
value='depth_zoe')
|
236 |
+
guidance_scale = gr.Slider(label='Guidance scale',
|
237 |
+
minimum=0,
|
238 |
+
maximum=40,
|
239 |
+
step=0.1,
|
240 |
+
value=7.5)
|
241 |
+
with gr.Row():
|
242 |
+
seed = gr.Slider(label='Seed',
|
243 |
+
minimum=0,
|
244 |
+
maximum=2147483647,
|
245 |
+
step=1,
|
246 |
+
value=0,
|
247 |
+
randomize=True)
|
248 |
+
with gr.Row():
|
249 |
+
positive_prompts = gr.Textbox(label='Positive prompts')
|
250 |
+
negative_prompts = gr.Textbox(label='Negative prompts')
|
251 |
+
run_button = gr.Button(value='Run All')
|
252 |
+
with gr.Accordion('Configuration',
|
253 |
+
open=False):
|
254 |
+
with gr.Row():
|
255 |
+
controlnet_conditioning_scale = gr.Slider(label='ControlNet conditioning scale',
|
256 |
+
minimum=0.0,
|
257 |
+
maximum=1.0,
|
258 |
+
value=1.0,
|
259 |
+
step=0.01)
|
260 |
+
controlnet_guidance_end = gr.Slider(label='ControlNet guidance end',
|
261 |
+
minimum=0.0,
|
262 |
+
maximum=1.0,
|
263 |
+
value=1.0,
|
264 |
+
step=0.01)
|
265 |
+
controlnet_guidance_start = gr.Slider(label='ControlNet guidance start',
|
266 |
+
minimum=0.0,
|
267 |
+
maximum=1.0,
|
268 |
+
value=0.0,
|
269 |
+
step=0.01)
|
270 |
+
|
271 |
+
with gr.Row():
|
272 |
+
grid_size = gr.Slider(label='Grid size (n x n)',
|
273 |
+
minimum=2,
|
274 |
+
maximum=3,
|
275 |
+
value=3,
|
276 |
+
step=1)
|
277 |
+
sample_size = gr.Slider(label='Number of grids',
|
278 |
+
minimum=1,
|
279 |
+
maximum=10,
|
280 |
+
value=2,
|
281 |
+
step=1)
|
282 |
+
pad = gr.Slider(label='Pad',
|
283 |
+
minimum=1,
|
284 |
+
maximum=10,
|
285 |
+
value=1,
|
286 |
+
step=1)
|
287 |
|
288 |
+
|
289 |
+
inputs = [input_path, preprocess_name, controlnet_conditioning_scale, controlnet_guidance_end, controlnet_guidance_start, grid_size, sample_size, pad, guidance_scale, negative_prompts, positive_prompts, seed]
|
290 |
+
|
291 |
+
run_button.click(fn=run,
|
292 |
+
inputs=inputs,
|
293 |
+
outputs=[result_video, control_video])
|
|
|
|
|
294 |
|
295 |
if __name__ == "__main__":
|
296 |
+
|
297 |
+
block.launch(share=True)
|
app_.py
DELETED
@@ -1,319 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import cv2
|
3 |
-
import os
|
4 |
-
import torch
|
5 |
-
import argparse
|
6 |
-
import os
|
7 |
-
import sys
|
8 |
-
import yaml
|
9 |
-
import datetime
|
10 |
-
sys.path.append(os.path.dirname(os.getcwd()))
|
11 |
-
from pipelines.sd_controlnet_rave import RAVE
|
12 |
-
from pipelines.sd_multicontrolnet_rave import RAVE_MultiControlNet
|
13 |
-
import shutil
|
14 |
-
import subprocess
|
15 |
-
import utils.constants as const
|
16 |
-
import utils.video_grid_utils as vgu
|
17 |
-
import warnings
|
18 |
-
warnings.filterwarnings("ignore")
|
19 |
-
import pprint
|
20 |
-
import glob
|
21 |
-
|
22 |
-
|
23 |
-
def init_device():
|
24 |
-
device_name = 'cuda' if torch.cuda.is_available() else 'cpu'
|
25 |
-
device = torch.device(device_name)
|
26 |
-
return device
|
27 |
-
|
28 |
-
def init_paths(input_ns):
|
29 |
-
if input_ns.save_folder == None or input_ns.save_folder == '':
|
30 |
-
input_ns.save_folder = input_ns.video_name
|
31 |
-
else:
|
32 |
-
input_ns.save_folder = os.path.join(input_ns.save_folder, input_ns.video_name)
|
33 |
-
save_dir = os.path.join(const.OUTPUT_PATH, input_ns.save_folder)
|
34 |
-
os.makedirs(save_dir, exist_ok=True)
|
35 |
-
save_idx = max([int(x[-5:]) for x in os.listdir(save_dir)])+1 if os.listdir(save_dir) != [] else 0
|
36 |
-
input_ns.save_path = os.path.join(save_dir, f'{input_ns.positive_prompts}-{str(save_idx).zfill(5)}')
|
37 |
-
|
38 |
-
|
39 |
-
if '-' in input_ns.preprocess_name:
|
40 |
-
input_ns.hf_cn_path = [const.PREPROCESSOR_DICT[i] for i in input_ns.preprocess_name.split('-')]
|
41 |
-
else:
|
42 |
-
input_ns.hf_cn_path = const.PREPROCESSOR_DICT[input_ns.preprocess_name]
|
43 |
-
input_ns.hf_path = "runwayml/stable-diffusion-v1-5"
|
44 |
-
|
45 |
-
input_ns.inverse_path = os.path.join(const.GENERATED_DATA_PATH, 'inverses', input_ns.video_name, f'{input_ns.preprocess_name}_{input_ns.model_id}_{input_ns.grid_size}x{input_ns.grid_size}_{input_ns.pad}')
|
46 |
-
input_ns.control_path = os.path.join(const.GENERATED_DATA_PATH, 'controls', input_ns.video_name, f'{input_ns.preprocess_name}_{input_ns.grid_size}x{input_ns.grid_size}_{input_ns.pad}')
|
47 |
-
os.makedirs(input_ns.control_path, exist_ok=True)
|
48 |
-
os.makedirs(input_ns.inverse_path, exist_ok=True)
|
49 |
-
os.makedirs(input_ns.save_path, exist_ok=True)
|
50 |
-
return input_ns
|
51 |
-
|
52 |
-
def install_civitai_model(model_id):
|
53 |
-
full_path = os.path.join(const.CWD, 'CIVIT_AI', 'diffusers_models', model_id, '*')
|
54 |
-
if len(glob.glob(full_path)) > 0:
|
55 |
-
full_path = glob.glob(full_path)[0]
|
56 |
-
return full_path
|
57 |
-
install_path = os.path.join(const.CWD, 'CIVIT_AI', 'safetensors')
|
58 |
-
install_path_model = os.path.join(const.CWD, 'CIVIT_AI', 'safetensors', model_id)
|
59 |
-
diffusers_path = os.path.join(const.CWD, 'CIVIT_AI', 'diffusers_models', model_id)
|
60 |
-
convert_py_path = os.path.join(const.CWD, 'CIVIT_AI', 'convert.py')
|
61 |
-
os.makedirs(install_path, exist_ok=True)
|
62 |
-
os.makedirs(diffusers_path, exist_ok=True)
|
63 |
-
subprocess.run(f'wget https://civitai.com/api/download/models/{model_id} --content-disposition --directory {install_path_model}'.split())
|
64 |
-
model_name = glob.glob(os.path.join(install_path, model_id, '*'))[0]
|
65 |
-
model_name2 = os.path.basename(glob.glob(os.path.join(install_path, model_id, '*'))[0]).replace('.safetensors', '')
|
66 |
-
diffusers_path_model_name = os.path.join(const.CWD, 'CIVIT_AI', 'diffusers_models', model_id, model_name2)
|
67 |
-
print(model_name)
|
68 |
-
subprocess.run(f'python {convert_py_path} --checkpoint_path {model_name} --dump_path {diffusers_path_model_name} --from_safetensors'.split())
|
69 |
-
subprocess.run(f'rm -rf {install_path}'.split())
|
70 |
-
return diffusers_path_model_name
|
71 |
-
|
72 |
-
def run(*args):
|
73 |
-
list_of_inputs = [x for x in args]
|
74 |
-
input_ns = argparse.Namespace(**{})
|
75 |
-
input_ns.video_path = list_of_inputs[0] # video_path
|
76 |
-
input_ns.video_name = os.path.basename(input_ns.video_path).replace('.mp4', '').replace('.gif', '')
|
77 |
-
input_ns.preprocess_name = list_of_inputs[1]
|
78 |
-
|
79 |
-
input_ns.batch_size = list_of_inputs[2]
|
80 |
-
input_ns.batch_size_vae = list_of_inputs[3]
|
81 |
-
|
82 |
-
input_ns.cond_step_start = list_of_inputs[4]
|
83 |
-
input_ns.controlnet_conditioning_scale = list_of_inputs[5]
|
84 |
-
input_ns.controlnet_guidance_end = list_of_inputs[6]
|
85 |
-
input_ns.controlnet_guidance_start = list_of_inputs[7]
|
86 |
-
|
87 |
-
input_ns.give_control_inversion = list_of_inputs[8]
|
88 |
-
|
89 |
-
input_ns.grid_size = list_of_inputs[9]
|
90 |
-
input_ns.sample_size = list_of_inputs[10]
|
91 |
-
input_ns.pad = list_of_inputs[11]
|
92 |
-
input_ns.guidance_scale = list_of_inputs[12]
|
93 |
-
input_ns.inversion_prompt = list_of_inputs[13]
|
94 |
-
|
95 |
-
input_ns.is_ddim_inversion = list_of_inputs[14]
|
96 |
-
input_ns.is_shuffle = list_of_inputs[15]
|
97 |
-
|
98 |
-
input_ns.negative_prompts = list_of_inputs[16]
|
99 |
-
input_ns.num_inference_steps = list_of_inputs[17]
|
100 |
-
input_ns.num_inversion_step = list_of_inputs[18]
|
101 |
-
input_ns.positive_prompts = list_of_inputs[19]
|
102 |
-
input_ns.save_folder = list_of_inputs[20]
|
103 |
-
|
104 |
-
input_ns.seed = list_of_inputs[21]
|
105 |
-
input_ns.model_id = const.MODEL_IDS[list_of_inputs[22]]
|
106 |
-
# input_ns.width = list_of_inputs[23]
|
107 |
-
# input_ns.height = list_of_inputs[24]
|
108 |
-
# input_ns.original_size = list_of_inputs[25]
|
109 |
-
diffusers_model_path = os.path.join(const.CWD, 'CIVIT_AI', 'diffusers_models')
|
110 |
-
os.makedirs(diffusers_model_path, exist_ok=True)
|
111 |
-
if 'model_id' not in list(input_ns.__dict__.keys()):
|
112 |
-
input_ns.model_id = "None"
|
113 |
-
|
114 |
-
if str(input_ns.model_id) != 'None':
|
115 |
-
input_ns.model_id = install_civitai_model(input_ns.model_id)
|
116 |
-
|
117 |
-
|
118 |
-
device = init_device()
|
119 |
-
input_ns = init_paths(input_ns)
|
120 |
-
|
121 |
-
input_ns.image_pil_list = vgu.prepare_video_to_grid(input_ns.video_path, input_ns.sample_size, input_ns.grid_size, input_ns.pad)
|
122 |
-
|
123 |
-
print(input_ns.video_path)
|
124 |
-
input_ns.sample_size = len(input_ns.image_pil_list)
|
125 |
-
print(f'Frame count: {len(input_ns.image_pil_list)}')
|
126 |
-
|
127 |
-
controlnet_class = RAVE_MultiControlNet if '-' in str(input_ns.controlnet_conditioning_scale) else RAVE
|
128 |
-
|
129 |
-
|
130 |
-
CN = controlnet_class(device)
|
131 |
-
|
132 |
-
CN.init_models(input_ns.hf_cn_path, input_ns.hf_path, input_ns.preprocess_name, input_ns.model_id)
|
133 |
-
|
134 |
-
input_dict = vars(input_ns)
|
135 |
-
pp = pprint.PrettyPrinter(indent=4)
|
136 |
-
pp.pprint(input_dict)
|
137 |
-
yaml_dict = {k:v for k,v in input_dict.items() if k != 'image_pil_list'}
|
138 |
-
|
139 |
-
start_time = datetime.datetime.now()
|
140 |
-
if '-' in str(input_ns.controlnet_conditioning_scale):
|
141 |
-
res_vid, control_vid_1, control_vid_2 = CN(input_dict)
|
142 |
-
else:
|
143 |
-
res_vid, control_vid = CN(input_dict)
|
144 |
-
end_time = datetime.datetime.now()
|
145 |
-
save_name = f"{'-'.join(input_ns.positive_prompts.split())}_cstart-{input_ns.controlnet_guidance_start}_gs-{input_ns.guidance_scale}_pre-{'-'.join((input_ns.preprocess_name.replace('-','+').split('_')))}_cscale-{input_ns.controlnet_conditioning_scale}_grid-{input_ns.grid_size}_pad-{input_ns.pad}_model-{os.path.basename(input_ns.model_id)}"
|
146 |
-
res_vid[0].save(os.path.join(input_ns.save_path, f'{save_name}.gif'), save_all=True, append_images=res_vid[1:], loop=10000)
|
147 |
-
control_vid[0].save(os.path.join(input_ns.save_path, f'control_{save_name}.gif'), save_all=True, append_images=control_vid[1:], optimize=False, loop=10000)
|
148 |
-
|
149 |
-
yaml_dict['total_time'] = (end_time - start_time).total_seconds()
|
150 |
-
yaml_dict['total_number_of_frames'] = len(res_vid)
|
151 |
-
yaml_dict['sec_per_frame'] = yaml_dict['total_time']/yaml_dict['total_number_of_frames']
|
152 |
-
with open(os.path.join(input_ns.save_path, 'config.yaml'), 'w') as yaml_file:
|
153 |
-
yaml.dump(yaml_dict, yaml_file)
|
154 |
-
|
155 |
-
return os.path.join(input_ns.save_path, f'{save_name}.gif'), os.path.join(input_ns.save_path, f'control_{save_name}.gif')
|
156 |
-
|
157 |
-
|
158 |
-
def output_video_fn(video_path):
|
159 |
-
fold_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "example_videos")
|
160 |
-
video_path = os.path.join(fold_path, os.path.basename(video_path).replace('input', 'output'))
|
161 |
-
return video_path
|
162 |
-
|
163 |
-
block = gr.Blocks().queue()
|
164 |
-
with block:
|
165 |
-
with gr.Row():
|
166 |
-
gr.Markdown('## RAVE')
|
167 |
-
with gr.Row():
|
168 |
-
with gr.Column():
|
169 |
-
with gr.Row():
|
170 |
-
input_path = gr.File(label='Upload Input Video', file_types=['.mp4'], scale=1)
|
171 |
-
|
172 |
-
inputs = gr.Video(label='Input Video',
|
173 |
-
format='mp4',
|
174 |
-
visible=True,
|
175 |
-
interactive=False,
|
176 |
-
scale=5)
|
177 |
-
input_path.upload(lambda x:x, inputs=[input_path], outputs=[inputs])
|
178 |
-
|
179 |
-
with gr.Row():
|
180 |
-
positive_prompts = gr.Textbox(label='Positive prompts')
|
181 |
-
negative_prompts = gr.Textbox(label='Negative prompts')
|
182 |
-
with gr.Row():
|
183 |
-
preprocess_name = gr.Dropdown(const.PREPROCESSOR_DICT.keys(),
|
184 |
-
label='Control type',
|
185 |
-
value='depth_zoe')
|
186 |
-
guidance_scale = gr.Slider(label='Guidance scale',
|
187 |
-
minimum=0,
|
188 |
-
maximum=40,
|
189 |
-
step=0.1,
|
190 |
-
value=7.5)
|
191 |
-
|
192 |
-
with gr.Row():
|
193 |
-
inversion_prompt = gr.Textbox(label='Inversion prompt')
|
194 |
-
seed = gr.Slider(label='Seed',
|
195 |
-
minimum=0,
|
196 |
-
maximum=2147483647,
|
197 |
-
step=1,
|
198 |
-
value=0,
|
199 |
-
randomize=True)
|
200 |
-
|
201 |
-
with gr.Row():
|
202 |
-
model_id = gr.Dropdown(const.MODEL_IDS,
|
203 |
-
label='Model id',
|
204 |
-
value='SD 1.5')
|
205 |
-
save_folder = gr.Textbox(label='Save folder')
|
206 |
-
|
207 |
-
run_button = gr.Button(value='Run All')
|
208 |
-
with gr.Accordion('Configuration',
|
209 |
-
open=False):
|
210 |
-
with gr.Row():
|
211 |
-
batch_size = gr.Slider(label='Batch size',
|
212 |
-
minimum=1,
|
213 |
-
maximum=36,
|
214 |
-
value=4,
|
215 |
-
step=1)
|
216 |
-
batch_size_vae = gr.Slider(label='Batch size of VAE',
|
217 |
-
minimum=1,
|
218 |
-
maximum=36,
|
219 |
-
value=1,
|
220 |
-
step=1)
|
221 |
-
|
222 |
-
with gr.Row():
|
223 |
-
is_ddim_inversion = gr.Checkbox(
|
224 |
-
label='Use DDIM Inversion',
|
225 |
-
value=True)
|
226 |
-
is_shuffle = gr.Checkbox(
|
227 |
-
label='Shuffle',
|
228 |
-
value=True)
|
229 |
-
|
230 |
-
with gr.Row():
|
231 |
-
num_inference_steps = gr.Slider(label='Number of inference steps',
|
232 |
-
minimum=1,
|
233 |
-
maximum=100,
|
234 |
-
value=20,
|
235 |
-
step=1)
|
236 |
-
num_inversion_step = gr.Slider(label='Number of inversion steps',
|
237 |
-
minimum=1,
|
238 |
-
maximum=100,
|
239 |
-
value=20,
|
240 |
-
step=1)
|
241 |
-
cond_step_start = gr.Slider(label='Conditioning step start',
|
242 |
-
minimum=0,
|
243 |
-
maximum=1.0,
|
244 |
-
value=0.0,
|
245 |
-
step=0.1)
|
246 |
-
|
247 |
-
with gr.Row():
|
248 |
-
controlnet_conditioning_scale = gr.Slider(label='ControlNet conditioning scale',
|
249 |
-
minimum=0.0,
|
250 |
-
maximum=1.0,
|
251 |
-
value=1.0,
|
252 |
-
step=0.01)
|
253 |
-
controlnet_guidance_end = gr.Slider(label='ControlNet guidance end',
|
254 |
-
minimum=0.0,
|
255 |
-
maximum=1.0,
|
256 |
-
value=1.0,
|
257 |
-
step=0.01)
|
258 |
-
controlnet_guidance_start = gr.Slider(label='ControlNet guidance start',
|
259 |
-
minimum=0.0,
|
260 |
-
maximum=1.0,
|
261 |
-
value=0.0,
|
262 |
-
step=0.01)
|
263 |
-
give_control_inversion = gr.Checkbox(
|
264 |
-
label='Give control during inversion',
|
265 |
-
value=True)
|
266 |
-
|
267 |
-
with gr.Row():
|
268 |
-
grid_size = gr.Slider(label='Grid size',
|
269 |
-
minimum=1,
|
270 |
-
maximum=10,
|
271 |
-
value=3,
|
272 |
-
step=1)
|
273 |
-
sample_size = gr.Slider(label='Sample size',
|
274 |
-
minimum=-1,
|
275 |
-
maximum=100,
|
276 |
-
value=-1,
|
277 |
-
step=1)
|
278 |
-
pad = gr.Slider(label='Pad',
|
279 |
-
minimum=1,
|
280 |
-
maximum=10,
|
281 |
-
value=1,
|
282 |
-
step=1)
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
with gr.Column():
|
287 |
-
with gr.Row():
|
288 |
-
result_video = gr.Image(label='Edited Video',
|
289 |
-
interactive=False)
|
290 |
-
control_video = gr.Image(label='Control Video',
|
291 |
-
interactive=False)
|
292 |
-
|
293 |
-
with gr.Row():
|
294 |
-
example_input = gr.Video(label='Input Example',
|
295 |
-
format='mp4',
|
296 |
-
visible=True,
|
297 |
-
interactive=False)
|
298 |
-
example_output = gr.Video(label='Output Example',
|
299 |
-
format='mp4',
|
300 |
-
visible=True,
|
301 |
-
interactive=False)
|
302 |
-
# input(os.path.join(os.path.dirname(os.path.abspath(__file__)), "example_videos", "exp_input_1.mp4"))
|
303 |
-
gr.Markdown("## Video Examples")
|
304 |
-
gr.Examples(
|
305 |
-
examples=[os.path.join(os.path.dirname(os.path.abspath(__file__)), "example_videos", "exp_input_1.mp4")],
|
306 |
-
inputs=example_input,
|
307 |
-
outputs=example_output,
|
308 |
-
fn=output_video_fn,
|
309 |
-
cache_examples=True,)
|
310 |
-
|
311 |
-
inputs = [input_path, preprocess_name, batch_size, batch_size_vae, cond_step_start, controlnet_conditioning_scale, controlnet_guidance_end, controlnet_guidance_start, give_control_inversion, grid_size, sample_size, pad, guidance_scale, inversion_prompt, is_ddim_inversion, is_shuffle, negative_prompts, num_inference_steps, num_inversion_step, positive_prompts, save_folder, seed, model_id]
|
312 |
-
|
313 |
-
run_button.click(fn=run,
|
314 |
-
inputs=inputs,
|
315 |
-
outputs=[result_video, control_video])
|
316 |
-
|
317 |
-
if __name__ == "__main__":
|
318 |
-
|
319 |
-
block.launch(share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
example_videos/exp_input_1.mp4
CHANGED
Binary files a/example_videos/exp_input_1.mp4 and b/example_videos/exp_input_1.mp4 differ
|
|
example_videos/exp_input_2.mp4
ADDED
Binary file (107 kB). View file
|
|
example_videos/exp_input_3.mp4
ADDED
Binary file (195 kB). View file
|
|
example_videos/exp_input_4.mp4
ADDED
Binary file (184 kB). View file
|
|
example_videos/exp_input_5.mp4
ADDED
Binary file (75.9 kB). View file
|
|
example_videos/exp_output_1.mp4
CHANGED
Binary files a/example_videos/exp_output_1.mp4 and b/example_videos/exp_output_1.mp4 differ
|
|
example_videos/exp_output_2.mp4
ADDED
Binary file (244 kB). View file
|
|
example_videos/exp_output_3.mp4
ADDED
Binary file (251 kB). View file
|
|
example_videos/exp_output_4.mp4
ADDED
Binary file (444 kB). View file
|
|
example_videos/exp_output_5.mp4
ADDED
Binary file (73.5 kB). View file
|
|
gradio_cached_examples/59/Output Example/994f36ecf77e57c9b298/exp_output_1.mp4
DELETED
Binary file (298 kB)
|
|
gradio_cached_examples/59/log.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
Output Example,flag,username,timestamp
|
2 |
-
"{""video"":{""path"":""gradio_cached_examples/59/Output Example/994f36ecf77e57c9b298/exp_output_1.mp4"",""url"":null,""size"":null,""orig_name"":""exp_output_1.mp4"",""mime_type"":null},""subtitles"":null}",,,2023-12-17 12:08:44.852661
|
|
|
|
|
|