Open-Sora-Plan-v1-0-0

Running

App Files Files Community

Open-Sora-Plan-v1-0-0 / opensora /eval /cal_ssim.py

fffiloni

Upload 244 files

b3f324b verified 11 months ago

raw

history blame contribute delete

3.51 kB

	import numpy as np
	import torch
	from tqdm import tqdm
	import cv2

	def ssim(img1, img2):
	C1 = 0.01 ** 2
	C2 = 0.03 ** 2
	img1 = img1.astype(np.float64)
	img2 = img2.astype(np.float64)
	kernel = cv2.getGaussianKernel(11, 1.5)
	window = np.outer(kernel, kernel.transpose())
	mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5] # valid
	mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
	mu1_sq = mu1 ** 2
	mu2_sq = mu2 ** 2
	mu1_mu2 = mu1 * mu2
	sigma1_sq = cv2.filter2D(img1 ** 2, -1, window)[5:-5, 5:-5] - mu1_sq
	sigma2_sq = cv2.filter2D(img2 ** 2, -1, window)[5:-5, 5:-5] - mu2_sq
	sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2
	ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) *
	(sigma1_sq + sigma2_sq + C2))
	return ssim_map.mean()


	def calculate_ssim_function(img1, img2):
	# [0,1]
	# ssim is the only metric extremely sensitive to gray being compared to b/w
	if not img1.shape == img2.shape:
	raise ValueError('Input images must have the same dimensions.')
	if img1.ndim == 2:
	return ssim(img1, img2)
	elif img1.ndim == 3:
	if img1.shape[0] == 3:
	ssims = []
	for i in range(3):
	ssims.append(ssim(img1[i], img2[i]))
	return np.array(ssims).mean()
	elif img1.shape[0] == 1:
	return ssim(np.squeeze(img1), np.squeeze(img2))
	else:
	raise ValueError('Wrong input image dimensions.')

	def trans(x):
	return x

	def calculate_ssim(videos1, videos2):
	print("calculate_ssim...")

	# videos [batch_size, timestamps, channel, h, w]

	assert videos1.shape == videos2.shape

	videos1 = trans(videos1)
	videos2 = trans(videos2)

	ssim_results = []

	for video_num in tqdm(range(videos1.shape[0])):
	# get a video
	# video [timestamps, channel, h, w]
	video1 = videos1[video_num]
	video2 = videos2[video_num]

	ssim_results_of_a_video = []
	for clip_timestamp in range(len(video1)):
	# get a img
	# img [timestamps[x], channel, h, w]
	# img [channel, h, w] numpy

	img1 = video1[clip_timestamp].numpy()
	img2 = video2[clip_timestamp].numpy()

	# calculate ssim of a video
	ssim_results_of_a_video.append(calculate_ssim_function(img1, img2))

	ssim_results.append(ssim_results_of_a_video)

	ssim_results = np.array(ssim_results)

	ssim = {}
	ssim_std = {}

	for clip_timestamp in range(len(video1)):
	ssim[clip_timestamp] = np.mean(ssim_results[:,clip_timestamp])
	ssim_std[clip_timestamp] = np.std(ssim_results[:,clip_timestamp])

	result = {
	"value": ssim,
	"value_std": ssim_std,
	"video_setting": video1.shape,
	"video_setting_name": "time, channel, heigth, width",
	}

	return result

	# test code / using example

	def main():
	NUMBER_OF_VIDEOS = 8
	VIDEO_LENGTH = 50
	CHANNEL = 3
	SIZE = 64
	videos1 = torch.zeros(NUMBER_OF_VIDEOS, VIDEO_LENGTH, CHANNEL, SIZE, SIZE, requires_grad=False)
	videos2 = torch.zeros(NUMBER_OF_VIDEOS, VIDEO_LENGTH, CHANNEL, SIZE, SIZE, requires_grad=False)
	device = torch.device("cuda")

	import json
	result = calculate_ssim(videos1, videos2)
	print(json.dumps(result, indent=4))

	if __name__ == "__main__":
	main()