import gradio as gr import os def inference(text,audio): os.system('tts --text '+text+' --model_name tts_models/multilingual/multi-dataset/your_tts --speaker_wav '+audio+' --language_idx "en"') return "tts_output.wav" gr.Interface(inference,["text","audio"],"audio").launch()