Manjushri's picture
Update app.py
655bbd1 verified
raw
history blame
4.71 kB
import gradio as gr
import numpy as np
import modin.pandas as pd
import torch
import os
from glob import glob
from pathlib import Path
from typing import Optional
from diffusers import LTXImageToVideoPipeline
from diffusers.utils import load_image, export_to_video
from PIL import Image
import uuid
import random
from huggingface_hub import hf_hub_download
device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.cuda.max_memory_allocated(device=device)
torch.cuda.empty_cache()
pipe = LTXImageToVideoPipeline.from_pretrained("Lightricks/LTX-Video", torch_dtype=torch.bfloat16)
pipe.to(device)
torch.cuda.empty_cache()
max_64_bit_int = 2**63 - 1
def sample(
image: Image, prompt, negative_prompt,
seed: Optional[int] = 42,
randomize_seed: bool = True,
num_inference_steps: int = 25,
num_frames: int = 60,
fps_id: int = 12,
decode_noise_scale: float = 0.3,
max_sequence_length: int = 512,
decoding_t: int = 3, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
device: str = "cuda",
output_folder: str = "outputs",
):
if image.mode == "RGBA":
image = image.convert("RGB")
if(randomize_seed):
seed = random.randint(0, max_64_bit_int)
torch.cuda.max_memory_allocated(device=device)
torch.cuda.empty_cache()
generator = torch.manual_seed(seed)
os.makedirs(output_folder, exist_ok=True)
base_count = len(glob(os.path.join(output_folder, "*.mp4")))
video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
video = pipe(image=image, prompt=prompt, negative_prompt=negative_prompt, width=768, height=512, num_frames=num_frames, num_inference_steps=num_inference_steps, guidance_scale=3.5, decode_noise_scale=0.3, max_sequence_length=512).frames[0]
export_to_video(video, video_path, fps=fps_id)
torch.manual_seed(seed)
torch.cuda.empty_cache()
return video_path, seed
def resize_image(image, output_size=(768, 512)):
# Calculate aspect ratios
target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size
image_aspect = image.width / image.height # Aspect ratio of the original image
# Resize then crop if the original image is larger
if image_aspect > target_aspect:
# Resize the image to match the target height, maintaining aspect ratio
new_height = output_size[1]
new_width = int(new_height * image_aspect)
resized_image = image.resize((new_width, new_height), Image.LANCZOS)
# Calculate coordinates for cropping
left = (new_width - output_size[0]) / 2
top = 0
right = (new_width + output_size[0]) / 2
bottom = output_size[1]
else:
# Resize the image to match the target width, maintaining aspect ratio
new_width = output_size[0]
new_height = int(new_width / image_aspect)
resized_image = image.resize((new_width, new_height), Image.LANCZOS)
# Calculate coordinates for cropping
left = 0
top = (new_height - output_size[1]) / 2
right = output_size[0]
bottom = (new_height + output_size[1]) / 2
torch.cuda.empty_cache()
cropped_image = resized_image.crop((left, top, right, bottom))
return cropped_image
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
image = gr.Image(label="Upload your image", type="pil")
generate_btn = gr.Button("Generate")
video = gr.Video()
with gr.Accordion("Advanced options", open=False):
seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
prompt=gr.Textbox(label="Prompt")
negative_prompt=gr.Textbox(label="Negative Prompt", value="Blur, Motion Blur, Artifacts, Motion Artifacts, Poor Quality, Low Resolution, Smudging, Streaks, Motion Streaks, Pixels, Pixelated, Ugly, Mutation, Mutated")
num_inference_steps=gr.Slider(label="Number of Inference Steps", value=25, minimum=25, maximum=100, step=25)
num_frames=gr.Slider(label = "Number of Frames", value=60, minimum=30, maximum=160)
fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be frames/fps", value=12, minimum=5, maximum=30, step=1)
image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)
generate_btn.click(fn=sample, inputs=[image, prompt, negative_prompt, seed, randomize_seed, num_inference_steps, num_frames, fps_id], outputs=[video, seed], api_name="video")
if __name__ == "__main__":
demo.queue(max_size=20, api_open=False)
demo.launch(show_api=False)