import shutil from gtts import gTTS from io import BytesIO from tempfile import NamedTemporaryFile from moviepy.editor import AudioFileClip, ImageClip, CompositeVideoClip, concatenate_videoclips class AVCombiner: def __call__(self, images, texts, output_path): """Create and save a video file to `output_path` after concatenating static images and audio clips generated from the text in `image_text_pairs`""" clips = [] for i,image in enumerate(images): text = texts[i] try: # create the audio clip object from the text audio_bytes = BytesIO() gTTS(text=text).write_to_fp(audio_bytes) audio_bytes.seek(0) with NamedTemporaryFile(suffix=".mp3", delete=False) as audio_tempfile: audio_tempfile.write(audio_bytes.read()) audio_clip = AudioFileClip(audio_tempfile.name) # create the image clip object image_clip = ImageClip(image) # set the duration of the image clip to be 200ms image_clip = image_clip.set_duration(audio_clip.duration+2) # create a composite clip with the image and audio clips video_clip = CompositeVideoClip([image_clip.set_audio(audio_clip.set_start(1))]) # set the FPS to 1 video_clip.fps = 1 clips.append(video_clip) except Exception as e: print(e) # concatenate all the video clips final_clip = concatenate_videoclips(clips) # Create a NamedTemporaryFile to store the video data with NamedTemporaryFile(suffix=".mp4", delete=False) as video_tempfile: # Write the video data to the temporary file final_clip.write_videofile(video_tempfile.name) # Copy the temporary file to the output path shutil.copy(video_tempfile.name, output_path) return f"{output_path}/{video_tempfile.name}"