Videobook_v1.5 / Videobook /Videobook.py
Warlord-K's picture
Add in AV Combiner and Text Preprocessor
9f98d7f
raw
history blame
2.53 kB
import os
import cv2
import numpy as np
import ffmpeg
from math import ceil
from segmindapi import SD2_1, Kadinsky
import gradio as gr
from .TextPreprocessor import TextPreprocessor
from .AVCombiner import AVCombiner
class Videobook:
def __init__(self):
self.preprocessor = TextPreprocessor()
self.combiner = AVCombiner()
def get_sentences(self, story):
return self.preprocessor(story)
# def generate_voice(self, story, sentences , path = 'tts.mp3'):
# for i,n in enumerate(sentences):
# tts=gTTS(n,lang='en')
# tts.save('tts'+str(i)+'.mp3')
# lgth=[]
# for i in range(len(sentences)):
# lgth.append(MP3('tts'+str(i)+'.mp3').info.length)
# os.remove(os.path.join(os.getcwd(),'tts'+str(i)+'.mp3'))
# tts=gTTS(story,lang='en')
# tts.save(path)
# return lgth
def generate_imgs(self, sentences, steps):
imgs = []
for sentence in sentences:
sentence = self.style + ' of ' + sentence + ', ' + self.tags
imgs.append(self.pipe.generate(prompt = sentence['pos'], negative_prompt = sentence['neg'], num_inference_steps = steps))
return imgs
# def addBuffer(self, imgs, lgth):
# imgs_buff = []
# for i,img in enumerate(imgs):
# for j in range(ceil(lgth[i] * self.fps)):
# imgs_buff.append(img)
# return imgs_buff
# def imgs_to_video(self, imgs, video_name='video.mp4'):
# video_dims = (imgs[0].width, imgs[0].height)
# fourcc = cv2.VideoWriter_fourcc(*'DIVX')
# video = cv2.VideoWriter(video_name, fourcc, self.fps, video_dims)
# for img in imgs:
# tmp_img = img.copy()
# video.write(cv2.cvtColor(np.array(tmp_img), cv2.COLOR_RGB2BGR))
# video.release()
# def make_video(self, imgs, lengths, video_name = "finished_video.mp4"):
# self.imgs_to_video(self.addBuffer(imgs, lengths), 'test_video.mp4')
# input_audio = ffmpeg.input(os.path.join(os.getcwd(),'tts.mp3'))
# input_video = ffmpeg.input(os.path.join(os.getcwd(),'test_video.mp4'))
# ffmpeg.concat(input_video, input_audio, v=1, a=1).output(video_name).run(overwrite_output=True)
def generate(self, story, api_key, fps, style, tags, model, steps):
self.fps = fps
self.style = style
self.tags = tags
if model == "Stable Diffusion v2.1":
self.pipe = SD2_1(api_key)
else:
self.pipe = Kadinsky(api_key)
processed_sentences, sentences = self.get_sentences(story)
return AVCombiner()(self.generate_imgs(processed_sentences, steps), sentences, os.getcwd())