svjack commited on
Commit
b57a513
·
verified ·
1 Parent(s): dd470cd

Upload 10 files

Browse files
.gitattributes CHANGED
@@ -34,3 +34,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  assets/13.gif filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  assets/13.gif filter=lfs diff=lfs merge=lfs -text
37
+ xingqiu_images/layer_014.png filter=lfs diff=lfs merge=lfs -text
38
+ xingqiu_images/layer_024.png filter=lfs diff=lfs merge=lfs -text
39
+ xingqiu_images/layer_034.png filter=lfs diff=lfs merge=lfs -text
40
+ xingqiu_images/layer_044.png filter=lfs diff=lfs merge=lfs -text
41
+ xingqiu_images/layer_054.png filter=lfs diff=lfs merge=lfs -text
42
+ xingqiu_images/layer_064.png filter=lfs diff=lfs merge=lfs -text
43
+ xingqiu_images/layer_074.png filter=lfs diff=lfs merge=lfs -text
44
+ xingqiu_images/layer_084.png filter=lfs diff=lfs merge=lfs -text
45
+ xingqiu_images/layer_094.png filter=lfs diff=lfs merge=lfs -text
run_videos.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ from omegaconf import OmegaConf
4
+ import torch
5
+ from scripts.evaluation.funcs import load_model_checkpoint, save_videos, batch_ddim_sampling, get_latent_z
6
+ from utils.utils import instantiate_from_config
7
+ from huggingface_hub import hf_hub_download
8
+ from einops import repeat
9
+ import torchvision.transforms as transforms
10
+ from pytorch_lightning import seed_everything
11
+ from einops import rearrange
12
+ import argparse
13
+ import glob
14
+ from PIL import Image
15
+ import numpy as np
16
+ from moviepy.editor import VideoFileClip, concatenate_videoclips
17
+
18
+ class Image2Video():
19
+ def __init__(self, result_dir='./tmp/', gpu_num=1, resolution='256_256') -> None:
20
+ self.resolution = (int(resolution.split('_')[0]), int(resolution.split('_')[1])) #hw
21
+ self.download_model()
22
+
23
+ self.result_dir = result_dir
24
+ if not os.path.exists(self.result_dir):
25
+ os.mkdir(self.result_dir)
26
+ ckpt_path='checkpoints/tooncrafter_'+resolution.split('_')[1]+'_interp_v1/model.ckpt'
27
+ config_file='configs/inference_'+resolution.split('_')[1]+'_v1.0.yaml'
28
+ config = OmegaConf.load(config_file)
29
+ model_config = config.pop("model", OmegaConf.create())
30
+ model_config['params']['unet_config']['params']['use_checkpoint']=False
31
+ model_list = []
32
+ for gpu_id in range(gpu_num):
33
+ model = instantiate_from_config(model_config)
34
+ print(ckpt_path)
35
+ assert os.path.exists(ckpt_path), "Error: checkpoint Not Found!"
36
+ model = load_model_checkpoint(model, ckpt_path)
37
+ model.eval()
38
+ model_list.append(model)
39
+ self.model_list = model_list
40
+ self.save_fps = 8
41
+
42
+ def get_image(self, image, prompt, steps=50, cfg_scale=7.5, eta=1.0, fs=3, seed=123, image2=None):
43
+ img_name = ""
44
+ if type(image) == type(""):
45
+ img_name = os.path.basename(image).split('.')[0]
46
+ image = np.asarray(Image.open(image))
47
+ if type(image2) == type(""):
48
+ image2 = np.asarray(Image.open(image2))
49
+
50
+ seed_everything(seed)
51
+ transform = transforms.Compose([
52
+ transforms.Resize(min(self.resolution)),
53
+ transforms.CenterCrop(self.resolution),
54
+ ])
55
+ torch.cuda.empty_cache()
56
+ print('start:', prompt, time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
57
+ start = time.time()
58
+ gpu_id=0
59
+ if steps > 60:
60
+ steps = 60
61
+ model = self.model_list[gpu_id]
62
+ model = model.half().cuda()
63
+ batch_size=1
64
+ channels = model.model.diffusion_model.out_channels
65
+ frames = model.temporal_length
66
+ h, w = self.resolution[0] // 8, self.resolution[1] // 8
67
+ noise_shape = [batch_size, channels, frames, h, w]
68
+
69
+ with torch.no_grad(), torch.cuda.amp.autocast():
70
+ text_emb = model.get_learned_conditioning([prompt])
71
+ img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().half().to(model.device)
72
+ img_tensor = (img_tensor / 255. - 0.5) * 2
73
+ image_tensor_resized = transform(img_tensor) #3,h,w
74
+ videos = image_tensor_resized.unsqueeze(0).unsqueeze(2) # bc1hw
75
+ videos = repeat(videos, 'b c t h w -> b c (repeat t) h w', repeat=frames//2)
76
+
77
+ if image2 is not None:
78
+ img_tensor2 = torch.from_numpy(image2).permute(2, 0, 1).float().half().to(model.device)
79
+ img_tensor2 = (img_tensor2 / 255. - 0.5) * 2
80
+ image_tensor_resized2 = transform(img_tensor2) #3,h,w
81
+ videos2 = image_tensor_resized2.unsqueeze(0).unsqueeze(2) # bchw
82
+ videos2 = repeat(videos2, 'b c t h w -> b c (repeat t) h w', repeat=frames//2)
83
+ videos = torch.cat([videos, videos2], dim=2)
84
+
85
+ z, hs = self.get_latent_z_with_hidden_states(model, videos)
86
+ img_tensor_repeat = torch.zeros_like(z)
87
+ img_tensor_repeat[:,:,:1,:,:] = z[:,:,:1,:,:]
88
+ img_tensor_repeat[:,:,-1:,:,:] = z[:,:,-1:,:,:]
89
+
90
+ cond_images = model.embedder(img_tensor.unsqueeze(0)) ## blc
91
+ img_emb = model.image_proj_model(cond_images)
92
+ imtext_cond = torch.cat([text_emb, img_emb], dim=1)
93
+ fs = torch.tensor([fs], dtype=torch.long, device=model.device)
94
+ cond = {"c_crossattn": [imtext_cond], "fs": fs, "c_concat": [img_tensor_repeat]}
95
+
96
+ batch_samples = batch_ddim_sampling(model, cond, noise_shape, n_samples=1, ddim_steps=steps, ddim_eta=eta, cfg_scale=cfg_scale, hs=hs)
97
+ if image2 is None:
98
+ batch_samples = batch_samples[:,:,:,:-1,...]
99
+ prompt_str = prompt.replace("/", "_slash_") if "/" in prompt else prompt
100
+ prompt_str = prompt_str.replace(" ", "_") if " " in prompt else prompt_str
101
+ prompt_str=prompt_str[:40]
102
+ if len(prompt_str) == 0:
103
+ prompt_str = 'empty_prompt'
104
+
105
+ # 使用 img_path 的名称来命名视频
106
+ #img_name = os.path.basename(image).split('.')[0]
107
+ video_filename = f"{img_name}"
108
+ save_videos(batch_samples, self.result_dir, filenames=[video_filename], fps=self.save_fps)
109
+ print(f"Saved in {video_filename}. Time used: {(time.time() - start):.2f} seconds")
110
+ model = model.cpu()
111
+ video_filename += ".mp4"
112
+ return os.path.join(self.result_dir, video_filename)
113
+
114
+ def download_model(self):
115
+ REPO_ID = 'Doubiiu/ToonCrafter'
116
+ filename_list = ['model.ckpt']
117
+ if not os.path.exists('./checkpoints/tooncrafter_'+str(self.resolution[1])+'_interp_v1/'):
118
+ os.makedirs('./checkpoints/tooncrafter_'+str(self.resolution[1])+'_interp_v1/')
119
+ for filename in filename_list:
120
+ local_file = os.path.join('./checkpoints/tooncrafter_'+str(self.resolution[1])+'_interp_v1/', filename)
121
+ if not os.path.exists(local_file):
122
+ hf_hub_download(repo_id=REPO_ID, filename=filename, local_dir='./checkpoints/tooncrafter_'+str(self.resolution[1])+'_interp_v1/', local_dir_use_symlinks=False)
123
+
124
+ def get_latent_z_with_hidden_states(self, model, videos):
125
+ b, c, t, h, w = videos.shape
126
+ x = rearrange(videos, 'b c t h w -> (b t) c h w')
127
+ encoder_posterior, hidden_states = model.first_stage_model.encode(x, return_hidden_states=True)
128
+
129
+ hidden_states_first_last = []
130
+ for hid in hidden_states:
131
+ hid = rearrange(hid, '(b t) c h w -> b c t h w', t=t)
132
+ hid_new = torch.cat([hid[:, :, 0:1], hid[:, :, -1:]], dim=2)
133
+ hidden_states_first_last.append(hid_new)
134
+
135
+ z = model.get_first_stage_encoding(encoder_posterior).detach()
136
+ z = rearrange(z, '(b t) c h w -> b c t h w', b=b, t=t)
137
+ return z, hidden_states_first_last
138
+
139
+ if __name__ == '__main__':
140
+ parser = argparse.ArgumentParser(description='Image to Video Conversion')
141
+ parser.add_argument('--image_dir', type=str, required=True, help='Path to the directory containing input images')
142
+ parser.add_argument('--prompt', type=str, required=True, help='Prompt for the video')
143
+ parser.add_argument('--steps', type=int, default=50, help='Number of steps')
144
+ parser.add_argument('--cfg_scale', type=float, default=7.5, help='CFG scale')
145
+ parser.add_argument('--eta', type=float, default=1.0, help='Eta value')
146
+ parser.add_argument('--fs', type=int, default=3, help='FS value')
147
+ parser.add_argument('--seed', type=int, default=123, help='Seed value')
148
+ args = parser.parse_args()
149
+
150
+ i2v = Image2Video("results" ,resolution = "320_512")
151
+ image_paths = sorted(glob.glob(os.path.join(args.image_dir, '*.png')))
152
+
153
+ video_paths = []
154
+ for i in range(len(image_paths) - 1):
155
+ img_path = image_paths[i]
156
+ img2_path = image_paths[i + 1]
157
+ video_path = i2v.get_image(img_path, args.prompt, args.steps, args.cfg_scale, args.eta, args.fs, args.seed, img2_path)
158
+ video_paths.append(video_path)
159
+ print('done', video_path)
160
+
161
+ # 使用第一个图像的名称来命名最终视频
162
+ first_image_name = os.path.basename(image_paths[0]).split('.')[0]
163
+ final_video_path = os.path.join(i2v.result_dir, f"{first_image_name}_final.mp4")
164
+
165
+ # 顺次连接所有生成的视频
166
+ clips = [VideoFileClip(vp) for vp in video_paths]
167
+ final_clip = concatenate_videoclips(clips, method="compose")
168
+ final_clip.write_videofile(final_video_path, codec="libx264", fps=i2v.save_fps)
169
+
170
+ print(f"Final video saved at {final_video_path}")
xingqiu_images/layer_014.png ADDED

Git LFS Details

  • SHA256: 719bffdc6b6f6860b3d79ef24c686a2f75bce1d3c98e2622b1351f0ca13a1bd4
  • Pointer size: 132 Bytes
  • Size of remote file: 1.62 MB
xingqiu_images/layer_024.png ADDED

Git LFS Details

  • SHA256: 10329b29196b28f3bc9814d6ecbd52d363785ab1d800ff8e90e2a372d2aec2aa
  • Pointer size: 132 Bytes
  • Size of remote file: 1.62 MB
xingqiu_images/layer_034.png ADDED

Git LFS Details

  • SHA256: 1b32973975d50be81c6f0cff9a4a472f044b8b0d574919e5fc24c6b2fc2cf08a
  • Pointer size: 132 Bytes
  • Size of remote file: 1.62 MB
xingqiu_images/layer_044.png ADDED

Git LFS Details

  • SHA256: ddec0cf180b9317654ad93e5ff617bf88fb1fd92dd0941fbb0e7b7a8a55a7e8a
  • Pointer size: 132 Bytes
  • Size of remote file: 1.63 MB
xingqiu_images/layer_054.png ADDED

Git LFS Details

  • SHA256: 180bf12cc8bd476f8846513ec5081977769ba869b8ff600566fd7fbf8a38bda4
  • Pointer size: 132 Bytes
  • Size of remote file: 1.63 MB
xingqiu_images/layer_064.png ADDED

Git LFS Details

  • SHA256: b9bc2180e39f36de61b606f157e75f1acfd33a76b0381a7bfc8765c077c61822
  • Pointer size: 132 Bytes
  • Size of remote file: 1.62 MB
xingqiu_images/layer_074.png ADDED

Git LFS Details

  • SHA256: 14bb12abab6d9261edc358083baac44d4552141a802ff98cb694cd7ff74adc40
  • Pointer size: 132 Bytes
  • Size of remote file: 1.6 MB
xingqiu_images/layer_084.png ADDED

Git LFS Details

  • SHA256: 14bb12abab6d9261edc358083baac44d4552141a802ff98cb694cd7ff74adc40
  • Pointer size: 132 Bytes
  • Size of remote file: 1.6 MB
xingqiu_images/layer_094.png ADDED

Git LFS Details

  • SHA256: 14bb12abab6d9261edc358083baac44d4552141a802ff98cb694cd7ff74adc40
  • Pointer size: 132 Bytes
  • Size of remote file: 1.6 MB