Videobook_v1.5 / Videobook /Videobook.py
Warlord-K's picture
Duplicate from Warlord-K/Videobook
99b9405
raw
history blame
2.42 kB
import os
import cv2
import numpy as np
from gtts import gTTS
from mutagen.mp3 import MP3
import nltk
import ffmpeg
nltk.download('punkt')
from nltk.tokenize import sent_tokenize
from math import ceil
from segmindapi import SD2_1, Kadinsky
import gradio as gr
class Videobook:
def get_sentences(self, story):
return sent_tokenize(story)
def generate_voice(self, story, sentences , path = 'tts.mp3'):
for i,n in enumerate(sentences):
tts=gTTS(n,lang='en')
tts.save('tts'+str(i)+'.mp3')
lgth=[]
for i in range(len(sentences)):
lgth.append(MP3('tts'+str(i)+'.mp3').info.length)
os.remove(os.path.join(os.getcwd(),'tts'+str(i)+'.mp3'))
tts=gTTS(story,lang='en')
tts.save(path)
return lgth
def generate_imgs(self, sentences, steps):
imgs = []
for sentence in sentences:
sentence = self.style + ' of ' + sentence + ', ' + self.tags
imgs.append(self.pipe.generate(sentence, num_inference_steps = steps))
return imgs
def addBuffer(self, imgs, lgth):
imgs_buff = []
for i,img in enumerate(imgs):
for j in range(ceil(lgth[i] * self.fps)):
imgs_buff.append(img)
return imgs_buff
def imgs_to_video(self, imgs, video_name='video.mp4'):
video_dims = (imgs[0].width, imgs[0].height)
fourcc = cv2.VideoWriter_fourcc(*'DIVX')
video = cv2.VideoWriter(video_name, fourcc, self.fps, video_dims)
for img in imgs:
tmp_img = img.copy()
video.write(cv2.cvtColor(np.array(tmp_img), cv2.COLOR_RGB2BGR))
video.release()
def make_video(self, imgs, lengths, video_name = "finished_video.mp4"):
self.imgs_to_video(self.addBuffer(imgs, lengths), 'test_video.mp4')
input_audio = ffmpeg.input(os.path.join(os.getcwd(),'tts.mp3'))
input_video = ffmpeg.input(os.path.join(os.getcwd(),'test_video.mp4'))
ffmpeg.concat(input_video, input_audio, v=1, a=1).output(video_name).run(overwrite_output=True)
def generate(self, story, api_key, fps, style, tags, model, steps):
self.fps = fps
self.style = style
self.tags = tags
if model == "Stable Diffusion v2.1":
self.pipe = SD2_1(api_key)
else:
self.pipe = Kadinsky(api_key)
sentences = self.get_sentences(story)
lengths = self.generate_voice(story, sentences)
images = self.generate_imgs(sentences, steps)
self.make_video(images, lengths)
return "finished_video.mp4"