import gradio as gr
import numpy as np
from TTS.config import load_config
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer


# vits_model = load_model()
# vits_model.tts('Alo')

config_path = '/home/nam/code_python/train_vits/config.json'
model_path = '/home/nam/code_python/train_vits/checkpoint_120000.pth'

synthesizer = Synthesizer(
        model_path,
        config_path,
        
    )

# def Vits_model():
    

#     tts = TTS(model_name = 'my_tts',
#               model_path=checkpoint_path,
#               config_path=config_path)

#     return tts

# vits_model = Vits_model()


def predict(text):
    text = text.lower()
    text = text.replace(".", " ")
    text = text.replace(",", "")
    text = text.replace(";", "")
    text = text.replace(":", "")
    text = text.replace("!", "")
    text = text.replace("?", "")
    text = text.replace("(", "")
    text = text.replace(")", "")
    audio = synthesizer.tts(text)

    audio = np.array(audio)
    return 16000,audio

gr.Interface(
    fn=predict,
    inputs="text",
    outputs="audio",

    examples=[
        "Sơn Tùng là ca sĩ nổi tiếng nhất Việt Nam.",
        "Tôi tên là Chu Văn Nam, đến từ Bắc Ninh",
        "Bác Hồ được biết đến không chỉ là một lãnh tụ xuất sắc mà còn là một nhà triết học, nhà cách mạng, và người tầm nhìn vĩ đại.",
    ],
    theme="default",
).launch(debug=False)