Spaces:

fffiloni
/

Go-With-The-Flow

Runtime error

App Files Files Community

Go-With-The-Flow / make_warped_noise.py

fffiloni

Migrated from GitHub

4d5ccae verified 14 days ago

raw

history blame contribute delete

4.7 kB

	#Ryan Burgert 2024

	#Setup:
	# Run this in a Jupyter Notebook on a computer with at least one GPU
	# `sudo apt install ffmpeg git`
	# `pip install rp`
	# The first time you run this it might be a bit slow (it will download necessary models)
	# The `rp` package will take care of installing the rest of the python packages for you

	import rp

	rp.r._pip_import_autoyes=True #Automatically install missing packages

	rp.pip_import('fire')
	rp.git_import('CommonSource') #If missing, installs code from https://github.com/RyannDaGreat/CommonSource
	import rp.git.CommonSource.noise_warp as nw
	import fire

	def main(video:str, output_folder:str):
	"""
	Takes a video URL or filepath and an output folder path
	It then resizes that video to height=480, width=720, 49 frames (CogVidX's dimensions)
	Then it calculates warped noise at latent resolution (i.e. 1/8 of the width and height) with 16 channels
	It saves that warped noise, optical flows, and related preview videos and images to the output folder
	The main file you need is <output_folder>/noises.npy which is the gaussian noises in (H,W,C) form
	"""

	if rp.folder_exists(output_folder):
	raise RuntimeError(f"The given output_folder={repr(output_folder)} already exists! To avoid clobbering what might be in there, please specify a folder that doesn't exist so I can create one for you. Alternatively, you could delete that folder if you don't care whats in it.")

	FRAME = 2**-1 #We immediately resize the input frames by this factor, before calculating optical flow
	#The flow is calulated at (input size) × FRAME resolution.
	#Higher FLOW values result in slower optical flow calculation and higher intermediate noise resolution
	#Larger is not always better - watch the preview in Jupyter to see if it looks good!

	FLOW = 2**3 #Then, we use bilinear interpolation to upscale the flow by this factor
	#We warp the noise at (input size) × FRAME × FLOW resolution
	#The noise is then downsampled back to (input size)
	#Higher FLOW values result in more temporally consistent noise warping at the cost of higher VRAM usage and slower inference time
	LATENT = 8 #We further downsample the outputs by this amount - because 8 pixels wide corresponds to one latent wide in Stable Diffusion
	#The final output size is (input size) ÷ LATENT regardless of FRAME and FLOW

	#LATENT = 1 #Uncomment this line for a prettier visualization! But for latent diffusion models, use LATENT=8

	#You can also use video files or URLs
	# video = "https://www.shutterstock.com/shutterstock/videos/1100085499/preview/stock-footage-bremen-germany-october-old-style-carousel-moving-on-square-in-city-horses-on-traditional.webm"

	# output_folder = "NoiseWarpOutputFolder"

	if isinstance(video,str):
	video=rp.load_video(video)

	#Preprocess the video
	video=rp.resize_list(video,length=49) #Stretch or squash video to 49 frames (CogVideoX's length)
	video=rp.resize_images_to_hold(video,height=480,width=720)
	video=rp.crop_images(video,height=480,width=720,origin='center') #Make the resolution 480x720 (CogVideoX's resolution)
	video=rp.as_numpy_array(video)


	#See this function's docstring for more information!
	output = nw.get_noise_from_video(
	video,
	remove_background=False, #Set this to True to matte the foreground - and force the background to have no flow
	visualize=True, #Generates nice visualization videos and previews in Jupyter notebook
	save_files=True, #Set this to False if you just want the noises without saving to a numpy file

	noise_channels=16,
	output_folder=output_folder,
	resize_frames=FRAME,
	resize_flow=FLOW,
	downscale_factor=round(FRAME * FLOW) * LATENT,
	)

	output.first_frame_path = rp.save_image(video[0],rp.path_join(output_folder,'first_frame.png'))

	rp.save_video_mp4(video, rp.path_join(output_folder, 'input.mp4'), framerate=12, video_bitrate='max')

	#output.numpy_noises_downsampled = as_numpy_images(
	#nw.resize_noise(
	#as_torch_images(x),
	#1 / 8,
	#)for x
	#)
	#
	#output.numpy_noises_downsampled_path = path_join(output_folder, 'noises_downsampled.npy')
	#np.save(numpy_noises_downsampled_path, output.numpy_noises_downsampled)

	print("Noise shape:" ,output.numpy_noises.shape)
	print("Flow shape:" ,output.numpy_flows .shape)
	print("Output folder:",output.output_folder)

	if __name__ == "__main__":
	fire.Fire(main)