import torch from diffusers import CogVideoXImageToVideoPipeline from diffusers.utils import export_to_video, load_image import os print("Starting...") # Lấy prompt từ biến môi trường hoặc sử dụng giá trị mặc định prompt = os.getenv('PROMPT_TEXT', "A little girl is riding a bicycle at high speed. Focused, detailed, realistic.") print("Loading model...") image = load_image(image="input.jpg") pipe = CogVideoXImageToVideoPipeline.from_pretrained( "THUDM/CogVideoX-5b-I2V", torch_dtype=torch.bfloat16 ) pipe.enable_sequential_cpu_offload() pipe.vae.enable_tiling() pipe.vae.enable_slicing() print("Generating...") video = pipe( prompt=prompt, image=image, num_videos_per_prompt=1, num_inference_steps=50, num_frames=49, guidance_scale=6, generator=torch.Generator(device="cuda").manual_seed(42), ).frames[0] print("Saving video...") export_to_video(video, "output.mp4", fps=8) print("Done!")