anshuln commited on
Commit
97dba9c
1 Parent(s): 7d8a993

Delete src/generation.py

Browse files
Files changed (1) hide show
  1. src/generation.py +0 -128
src/generation.py DELETED
@@ -1,128 +0,0 @@
1
- import os
2
-
3
- import sys
4
- sys.path.insert(1, os.path.join(sys.path[0], '..'))
5
-
6
- import warnings
7
-
8
- import cv2
9
- import numpy as np
10
- import tqdm
11
- import torch
12
- import torch.nn.functional as F
13
- import torchvision.io as vision_io
14
-
15
-
16
-
17
- from models.pipelines import TextToVideoSDPipelineSpatialAware
18
- from diffusers.utils import export_to_video
19
- from PIL import Image
20
- import torchvision
21
-
22
-
23
-
24
- import warnings
25
- warnings.filterwarnings("ignore")
26
-
27
- OUTPUT_PATH = "/scr/demo"
28
-
29
- def generate_video(pipe, overall_prompt, latents, get_latents=False, num_frames=24, num_inference_steps=50, fg_masks=None,
30
- fg_masked_latents=None, frozen_steps=0, frozen_prompt=None, custom_attention_mask=None, fg_prompt=None):
31
-
32
- video_frames = pipe(overall_prompt, num_frames=num_frames, latents=latents, num_inference_steps=num_inference_steps, frozen_mask=fg_masks,
33
- frozen_steps=frozen_steps, latents_all_input=fg_masked_latents, frozen_prompt=frozen_prompt, custom_attention_mask=custom_attention_mask, fg_prompt=fg_prompt,
34
- make_attention_mask_2d=True, attention_mask_block_diagonal=True, height=320, width=576 ).frames
35
- if get_latents:
36
- video_latents = pipe(overall_prompt, num_frames=num_frames, latents=latents, num_inference_steps=num_inference_steps, output_type="latent").frames
37
- return video_frames, video_latents
38
-
39
- return video_frames
40
-
41
- def save_frames(path):
42
- video, audio, video_info = vision_io.read_video(f"{path}.mp4", pts_unit='sec')
43
-
44
- # Number of frames
45
- num_frames = video.size(0)
46
-
47
- # Save each frame
48
- os.makedirs(f"{path}", exist_ok=True)
49
- for i in range(num_frames):
50
- frame = video[i, :, :, :].numpy()
51
- # Convert from C x H x W to H x W x C and from torch tensor to PIL Image
52
- # frame = frame.permute(1, 2, 0).numpy()
53
- img = Image.fromarray(frame.astype('uint8'))
54
- img.save(f"{path}/frame_{i:04d}.png")
55
-
56
- if __name__ == "__main__":
57
- # Example usage
58
- num_frames = 24
59
- save_path = "video"
60
- torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
61
- random_latents = torch.randn([1, 4, num_frames, 40, 72], generator=torch.Generator().manual_seed(2)).to(torch_device)
62
-
63
- try:
64
- pipe = TextToVideoSDPipelineSpatialAware.from_pretrained(
65
- "cerspense/zeroscope_v2_576w", torch_dtype=torch.float, variant="fp32").to(torch_device)
66
- except:
67
- pipe = TextToVideoSDPipelineSpatialAware.from_pretrained(
68
- "cerspense/zeroscope_v2_576w", torch_dtype=torch.float, variant="fp32").to(torch_device)
69
-
70
- # Generate video
71
-
72
-
73
- bbox_mask = torch.zeros([24, 1, 40, 72], device=torch_device)
74
- bbox_mask_2 = torch.zeros([24, 1, 40, 72], device=torch_device)
75
-
76
-
77
- x_start = [10 + (i % 3) for i in range(num_frames)] # Simulating slight movement in x
78
- x_end = [30 + (i % 3) for i in range(num_frames)] # Simulating slight movement in x
79
- y_start = [10 for _ in range(num_frames)] # Static y start as the bear is seated/standing
80
- y_end = [25 for _ in range(num_frames)] # Static y end, considering the size of the guitar
81
-
82
- # Populate the bbox_mask tensor with ones where the bounding box is located
83
- for i in range(num_frames):
84
- bbox_mask[i, :, x_start[i]:x_end[i], y_start[i]:y_end[i]] = 1
85
- bbox_mask_2[i, :, x_start[i]:x_end[i], 72-y_end[i]:72-y_start[i]] = 1
86
-
87
- # fg_masks = bbox_mask
88
- fg_masks = [bbox_mask, bbox_mask_2]
89
-
90
-
91
-
92
- frozen_prompt = None
93
- fg_masked_latents = None
94
- fg_objects = []
95
- prompts = []
96
- prompts = [
97
- (["cat", "goldfish bowl"], "A cat curiously staring at a goldfish bowl on a sunny windowsill."),
98
- (["Superman", "Batman"], "Superman and Batman standing side by side in a heroic pose against a city skyline."),
99
- (["rose", "daisy"], "A rose and a daisy in a small vase on a rustic wooden table."),
100
- (["Harry Potter", "Hermione Granger"], "Harry Potter and Hermione Granger studying a magical map."),
101
- (["butterfly", "dragonfly"], "A butterfly and a dragonfly resting on a leaf in a vibrant garden."),
102
- (["teddy bear", "toy train"], "A teddy bear and a toy train on a child's playmat in a brightly lit room."),
103
- (["frog", "turtle"], "A frog and a turtle sitting on a lily pad in a serene pond."),
104
- (["Mickey Mouse", "Donald Duck"], "Mickey Mouse and Donald Duck enjoying a day at the beach, building a sandcastle."),
105
- (["penguin", "seal"], "A penguin and a seal lounging on an iceberg in the Antarctic."),
106
- (["lion", "zebra"], "A lion and a zebra peacefully drinking water from the same pond in the savannah.")
107
- ]
108
-
109
- for fg_object, overall_prompt in prompts:
110
- os.makedirs(f"{OUTPUT_PATH}/{save_path}/{overall_prompt}-mask", exist_ok=True)
111
- try:
112
- for i in range(num_frames):
113
- torchvision.utils.save_image(fg_masks[0][i,0], f"{OUTPUT_PATH}/{save_path}/{overall_prompt}-mask/frame_{i:04d}_0.png")
114
- torchvision.utils.save_image(fg_masks[1][i,0], f"{OUTPUT_PATH}/{save_path}/{overall_prompt}-mask/frame_{i:04d}_1.png")
115
- except:
116
- pass
117
- print(fg_object, overall_prompt)
118
- seed = 2
119
- random_latents = torch.randn([1, 4, num_frames, 40, 72], generator=torch.Generator().manual_seed(seed)).to(torch_device)
120
- for num_inference_steps in range(40,50,10):
121
- for frozen_steps in [0, 1, 2]:
122
- video_frames = generate_video(pipe, overall_prompt, random_latents, get_latents=False, num_frames=num_frames, num_inference_steps=num_inference_steps,
123
- fg_masks=fg_masks, fg_masked_latents=fg_masked_latents, frozen_steps=frozen_steps, frozen_prompt=frozen_prompt, fg_prompt=fg_object)
124
- # Save video frames
125
- os.makedirs(f"{OUTPUT_PATH}/{save_path}/{overall_prompt}", exist_ok=True)
126
- video_path = export_to_video(video_frames, f"{OUTPUT_PATH}/{save_path}/{overall_prompt}/{frozen_steps}_of_{num_inference_steps}_{seed}_masked.mp4")
127
- save_frames(f"{OUTPUT_PATH}/{save_path}/{overall_prompt}/{frozen_steps}_of_{num_inference_steps}_{seed}_masked")
128
-