from langchain.llms import HuggingFacePipeline import torch from components import pexels, utils import os, gc import gradio as gr from transformers import VitsModel, AutoTokenizer, pipeline import torch model = VitsModel.from_pretrained("facebook/mms-tts-ind") tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-ind") pexels_api_key = os.getenv('pexels_api_key') def pred(product_name, orientation): if orientation == "Shorts/Reels/TikTok (1080 x 1920)": orientation = "potrait" height = 1920 width = 1080 elif orientation == "Youtube Videos (1920 x 1080)": orientation = "landscape" height = 1080 width = 1920 else : orientation = "square" height = 1080 width = 1080 folder_name, sentences = pexels.generate_videos(product_name, pexels_api_key, orientation, height, width, model, tokenizer) gc.collect() utils.combine_videos(folder_name) vid = os.path.join(folder_name,"Final_Ad_Video.mp4") spe = "x.wav" utils.combine_audio_video(folder_name, vid, spe) return ["\n".join(sentences), os.path.join(folder_name, "new_filename.mp4")] #{'video':os.path.join(folder_name, "Final_Ad_Video.mp4"), # 'captions':"\n".join(sentences)} with gr.Blocks() as demo: gr.Markdown( """ # Content [Video] Generator Create a short video based on your text input using AI ### Note : the video generation takes about 2-4 minutes """ ) dimension = gr.Dropdown( ["Shorts/Reels/TikTok (1080 x 1920)", "Facebook/Youtube Videos (1920 x 1080)", "Square (1080 x 1080)"], label="Video Dimension", info="Choose dimension" ) product_name = gr.Textbox(label="text story", lines=15, max_lines=100) captions = gr.Textbox(label="captions") video = gr.Video() btn = gr.Button("Submit") btn.click(pred, inputs=[product_name, dimension], outputs=[captions,video]) demo.launch()