--- license: mit --- ``` git clone https://huggingface.co./ProgramerSalar/L1-S cd L1-S pip install -r requirements.txt ``` - Installing time of requirements.txt file is approx=15 minute ``` import os import json import torch import numpy as np import PIL from PIL import Image from IPython.display import HTML from pyramid_dit import PyramidDiTForVideoGeneration from IPython.display import Image as ipython_image from diffusers.utils import load_image, export_to_video, export_to_gif variant='diffusion_transformer_768p' # For high resolution # variant='diffusion_transformer_384p' # For low resolution model_path = "Path" # The downloaded checkpoint dir model_dtype = 'bf16' device_id = 0 torch.cuda.set_device(device_id) model = PyramidDiTForVideoGeneration( model_path, model_dtype, model_variant=variant, ) model.vae.to("cuda") model.dit.to("cuda") model.text_encoder.to("cuda") model.vae.enable_tiling() if model_dtype == "bf16": torch_dtype = torch.bfloat16 elif model_dtype == "fp16": torch_dtype = torch.float16 else: torch_dtype = torch.float32 prompt = "A movie trailer featuring the adventures of the 30 year old space man wearing a red wool knitted motorcycle helmet, blue sky, salt desert, cinematic style, shot on 35mm film, vivid colors" # used for 384p model variant # width = 640 # height = 384 # used for 768p model variant width = 1280 height = 768 temp = 16 # temp in [1, 31] <=> frame in [1, 241] <=> duration in [0, 10s] with torch.no_grad(), torch.cuda.amp.autocast(enabled=True if model_dtype != 'fp32' else False, dtype=torch_dtype): frames = model.generate( prompt=prompt, num_inference_steps=[20, 20, 20], video_num_inference_steps=[10, 10, 10], height=height, width=width, temp=temp, guidance_scale=9.0, # The guidance for the first frame, set it to 7 for 384p variant video_guidance_scale=5.0, # The guidance for the other video latent output_type="pil", save_memory=True, # If you have enough GPU memory, set it to `False` to improve vae decoding speed ) export_to_video(frames, "./text_to_video_sample.mp4", fps=24) ``` - Video Generating Time is 10 minute