File size: 2,284 Bytes
d323598 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
"""Command line interface for generating videos from the model."""
from __future__ import annotations
import argparse
import queue
import threading
import rerun as rr
import vista
def generate_local(
first_frame_file_name: str,
height=576,
width=1024,
n_rounds=4,
n_frames=25,
n_steps=10,
cfg_scale=2.5,
cond_aug=0.0,
):
# Use a queue to log immediately from internals
log_queue = queue.SimpleQueue()
handle = threading.Thread(
target=vista.run_sampling,
args=[
log_queue,
first_frame_file_name,
height,
width,
n_rounds,
n_frames,
n_steps,
cfg_scale,
cond_aug,
],
)
handle.start()
while True:
msg = log_queue.get()
if msg == "done":
break
else:
entity_path, entity, times = msg
rr.reset_time()
for timeline, time in times:
if isinstance(time, int):
rr.set_time_sequence(timeline, time)
else:
rr.set_time_seconds(timeline, time)
rr.log(entity_path, entity)
handle.join()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Generate video conditioned on a single image using the Vista model."
)
parser.add_argument(
"--img-path",
type=str,
help="Path to image used as input for Canny edge detector.",
default="./example_images/nus-0.jpg",
)
parser.add_argument(
"--num-steps",
type=int,
help="Number of diffusion steps per image. Recommended range: 10-50. Higher values result in more detailed images and less blurry results.",
default=20,
)
parser.add_argument(
"--num-segments",
type=int,
help="Number of segments to generate. Each segment consists of 25 frames.",
default=3,
)
rr.script_add_args(parser)
args = parser.parse_args()
rr.script_setup(
args,
"rerun_example_vista",
default_blueprint=vista.generate_blueprint(args.num_segments),
)
generate_local(args.img_path, n_steps=args.num_steps, n_rounds=args.num_segments)
|