jonathanjordan21's picture
Create Audio Video combnation
7764d0a
raw
history blame
2.01 kB
from langchain.llms import HuggingFacePipeline
import torch
from components import pexels, utils
import os, gc
import gradio as gr
from transformers import VitsModel, AutoTokenizer, pipeline
import torch
model = VitsModel.from_pretrained("facebook/mms-tts-ind")
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-ind")
pexels_api_key = os.getenv('pexels_api_key')
def pred(product_name, orientation):
if orientation == "Shorts/Reels/TikTok (1080 x 1920)":
orientation = "potrait"
height = 1920
width = 1080
elif orientation == "Youtube Videos (1920 x 1080)":
orientation = "landscape"
height = 1080
width = 1920
else :
orientation = "square"
height = 1080
width = 1080
folder_name, sentences = pexels.generate_videos(product_name, pexels_api_key, orientation, height, width, model, tokenizer)
gc.collect()
utils.combine_videos(folder_name)
vid = os.path.join(folder_name,"Final_Ad_Video.mp4")
spe = "x.wav"
utils.combine_audio_video(folder_name, vid, spe)
return ["\n".join(sentences), os.path.join(folder_name, "new_filename.mp4")]
#{'video':os.path.join(folder_name, "Final_Ad_Video.mp4"),
# 'captions':"\n".join(sentences)}
with gr.Blocks() as demo:
gr.Markdown(
"""
# Content [Video] Generator
Create a short video based on your text input using AI
### Note : the video generation takes about 2-4 minutes
"""
)
dimension = gr.Dropdown(
["Shorts/Reels/TikTok (1080 x 1920)", "Facebook/Youtube Videos (1920 x 1080)", "Square (1080 x 1080)"],
label="Video Dimension", info="Choose dimension"
)
product_name = gr.Textbox(label="text story", lines=15, max_lines=100)
captions = gr.Textbox(label="captions")
video = gr.Video()
btn = gr.Button("Submit")
btn.click(pred, inputs=[product_name, dimension], outputs=[captions,video])
demo.launch()