import os import sys from fastapi import Request import gradio as gr from TTS.api import TTS from TTS.utils.manage import ModelManager from io import BytesIO import base64 model_names = TTS().list_models() print(model_names.__dict__) print(model_names.__dir__()) os.environ["COQUI_TOS_AGREED"] = "1" model_name = "tts_models/multilingual/multi-dataset/xtts_v2" tts = TTS(model_name, gpu=False) tts.to("cuda") def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree): if agree: speaker_wav = mic_file_path if use_mic and mic_file_path else audio_file_pth if not speaker_wav: return None, "Please provide a reference audio." if len(prompt) < 2: return None, "Please provide a longer text prompt." if len(prompt) > 10000: return None, "Text length is limited to 10000 characters. Please try a shorter text." try: if language == "fr" and "your" in model_name: language = "fr-fr" if "/fr/" in model_name: language = None tts.tts_to_file( text=prompt, file_path="output.wav", speaker_wav=speaker_wav, language=language ) except RuntimeError as e: if "device-assert" in str(e): return None, "Runtime error encountered. Please try again later." else: raise e with open("output.wav", "rb") as audio_file: audio_bytes = BytesIO(audio_file.read()) audio = base64.b64encode(audio_bytes.read()).decode("utf-8") audio_player = f'' return gr.make_waveform(audio="output.wav"),audio_player else: return None, "Please accept the Terms & Conditions."