import shutil from gtts import gTTS from io import BytesIO from tempfile import NamedTemporaryFile from moviepy.editor import AudioFileClip, ImageClip, CompositeVideoClip, concatenate_videoclips import numpy as np class AVCombiner: def __call__(self, images, texts, output_path): """Create and save a video file to `output_path` after concatenating static images and audio clips generated from the text in `image_text_pairs`""" clips = [] for i,image in enumerate(images): text = texts[i] try: # create the audio clip object from the text audio_bytes = BytesIO() gTTS(text=text).write_to_fp(audio_bytes) audio_bytes.seek(0) with NamedTemporaryFile(suffix=".mp3", delete=False) as audio_tempfile: audio_tempfile.write(audio_bytes.read()) audio_clip = AudioFileClip(audio_tempfile.name) # create the image clip object image_clip = ImageClip(np.array(image)) # set the duration of the image clip to be 200ms image_clip = image_clip.set_duration(audio_clip.duration+2) # create a composite clip with the image and audio clips video_clip = CompositeVideoClip([image_clip.set_audio(audio_clip.set_start(1))]) # set the FPS to 1 video_clip.fps = 1 clips.append(video_clip) except Exception as e: print(e) # concatenate all the video clips final_clip = concatenate_videoclips(clips) final_clip.write_videofile(output_path + "/final_video.mp4") return f"{output_path}/final_video.mp4"