Spaces:
Runtime error
Runtime error
import os | |
import cv2 | |
import numpy as np | |
from gtts import gTTS | |
from mutagen.mp3 import MP3 | |
import nltk | |
import ffmpeg | |
nltk.download('punkt') | |
from nltk.tokenize import sent_tokenize | |
from math import ceil | |
from segmindapi import SD2_1, Kadinsky | |
import gradio as gr | |
class Videobook: | |
def get_sentences(self, story): | |
return sent_tokenize(story) | |
def generate_voice(self, story, sentences , path = 'tts.mp3'): | |
for i,n in enumerate(sentences): | |
tts=gTTS(n,lang='en') | |
tts.save('tts'+str(i)+'.mp3') | |
lgth=[] | |
for i in range(len(sentences)): | |
lgth.append(MP3('tts'+str(i)+'.mp3').info.length) | |
os.remove(os.path.join(os.getcwd(),'tts'+str(i)+'.mp3')) | |
tts=gTTS(story,lang='en') | |
tts.save(path) | |
return lgth | |
def generate_imgs(self, sentences, steps): | |
imgs = [] | |
for sentence in sentences: | |
sentence = self.style + ' of ' + sentence + ', ' + self.tags | |
imgs.append(self.pipe.generate(sentence, num_inference_steps = steps)) | |
return imgs | |
def addBuffer(self, imgs, lgth): | |
imgs_buff = [] | |
for i,img in enumerate(imgs): | |
for j in range(ceil(lgth[i] * self.fps)): | |
imgs_buff.append(img) | |
return imgs_buff | |
def imgs_to_video(self, imgs, video_name='video.mp4'): | |
video_dims = (imgs[0].width, imgs[0].height) | |
fourcc = cv2.VideoWriter_fourcc(*'DIVX') | |
video = cv2.VideoWriter(video_name, fourcc, self.fps, video_dims) | |
for img in imgs: | |
tmp_img = img.copy() | |
video.write(cv2.cvtColor(np.array(tmp_img), cv2.COLOR_RGB2BGR)) | |
video.release() | |
def make_video(self, imgs, lengths, video_name = "finished_video.mp4"): | |
self.imgs_to_video(self.addBuffer(imgs, lengths), 'test_video.mp4') | |
input_audio = ffmpeg.input(os.path.join(os.getcwd(),'tts.mp3')) | |
input_video = ffmpeg.input(os.path.join(os.getcwd(),'test_video.mp4')) | |
ffmpeg.concat(input_video, input_audio, v=1, a=1).output(video_name).run(overwrite_output=True) | |
def generate(self, story, api_key, fps, style, tags, model, steps): | |
self.fps = fps | |
self.style = style | |
self.tags = tags | |
if model == "Stable Diffusion v2.1": | |
self.pipe = SD2_1(api_key) | |
else: | |
self.pipe = Kadinsky(api_key) | |
sentences = self.get_sentences(story) | |
lengths = self.generate_voice(story, sentences) | |
images = self.generate_imgs(sentences, steps) | |
self.make_video(images, lengths) | |
return "finished_video.mp4" |