Spaces:
Runtime error
Runtime error
File size: 2,416 Bytes
99b9405 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import os
import cv2
import numpy as np
from gtts import gTTS
from mutagen.mp3 import MP3
import nltk
import ffmpeg
nltk.download('punkt')
from nltk.tokenize import sent_tokenize
from math import ceil
from segmindapi import SD2_1, Kadinsky
import gradio as gr
class Videobook:
def get_sentences(self, story):
return sent_tokenize(story)
def generate_voice(self, story, sentences , path = 'tts.mp3'):
for i,n in enumerate(sentences):
tts=gTTS(n,lang='en')
tts.save('tts'+str(i)+'.mp3')
lgth=[]
for i in range(len(sentences)):
lgth.append(MP3('tts'+str(i)+'.mp3').info.length)
os.remove(os.path.join(os.getcwd(),'tts'+str(i)+'.mp3'))
tts=gTTS(story,lang='en')
tts.save(path)
return lgth
def generate_imgs(self, sentences, steps):
imgs = []
for sentence in sentences:
sentence = self.style + ' of ' + sentence + ', ' + self.tags
imgs.append(self.pipe.generate(sentence, num_inference_steps = steps))
return imgs
def addBuffer(self, imgs, lgth):
imgs_buff = []
for i,img in enumerate(imgs):
for j in range(ceil(lgth[i] * self.fps)):
imgs_buff.append(img)
return imgs_buff
def imgs_to_video(self, imgs, video_name='video.mp4'):
video_dims = (imgs[0].width, imgs[0].height)
fourcc = cv2.VideoWriter_fourcc(*'DIVX')
video = cv2.VideoWriter(video_name, fourcc, self.fps, video_dims)
for img in imgs:
tmp_img = img.copy()
video.write(cv2.cvtColor(np.array(tmp_img), cv2.COLOR_RGB2BGR))
video.release()
def make_video(self, imgs, lengths, video_name = "finished_video.mp4"):
self.imgs_to_video(self.addBuffer(imgs, lengths), 'test_video.mp4')
input_audio = ffmpeg.input(os.path.join(os.getcwd(),'tts.mp3'))
input_video = ffmpeg.input(os.path.join(os.getcwd(),'test_video.mp4'))
ffmpeg.concat(input_video, input_audio, v=1, a=1).output(video_name).run(overwrite_output=True)
def generate(self, story, api_key, fps, style, tags, model, steps):
self.fps = fps
self.style = style
self.tags = tags
if model == "Stable Diffusion v2.1":
self.pipe = SD2_1(api_key)
else:
self.pipe = Kadinsky(api_key)
sentences = self.get_sentences(story)
lengths = self.generate_voice(story, sentences)
images = self.generate_imgs(sentences, steps)
self.make_video(images, lengths)
return "finished_video.mp4" |