PolyAI-pheme

Build error

File size: 1,893 Bytes

8b3b181
fcfc5d9
 
 
 
 
 
 
 
 
 
8b3b181
fcfc5d9
 
694ecc6
 
 
 
 
fcfc5d9
 
 
694ecc6
fcfc5d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f770b6
 
 
 
 
fcfc5d9
 
8b3b181
fcfc5d9
 
 
 
8b3b181
fcfc5d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
694ecc6

"""Simple demo app.

Copyright PolyAI Limited.
"""
import time
from pathlib import Path

import gradio as gr

from transformer_infer import PhemeClient, parse_arguments


VOICE_OPTIONS = [
        "male_voice",
        "POD1000000004_S0000246", 
        "POD1000000018_S0000253", 
        "POD1000000048_S0000035", 
        "YOU1000000006_S0000051", 
        "YOU1000000044_S0000798", 
]

args = parse_arguments()

model = PhemeClient(args)


def inference(
    text,
    voice,
    top_k,
    temperature
):
    with open("PhemeVoice.log", "a") as f:
        f.write(f"{voice}: {text} \n")
    start_time = time.time()

    data = model.infer(
        text, voice, top_k=top_k, temperature=temperature)
    samplerate = 16_000
    print("Time taken: ", time.time() - start_time)
    yield (samplerate, data)


def main():
    title = "Pheme"
    description = """Pheme Model can generate a variety of conversational voices in 16 kHz for phone-call applications.
    
    Paper: https://arxiv.org/pdf/2401.02839.pdf
    Github: https://github.com/PolyAI-LDN/pheme
    """
    text = gr.Textbox(
        lines=3,
        value="I gotta say, I never expect that to happened. Um I had some expectations but you know.",
        label="Text",
    )

    voice = gr.Dropdown(
        VOICE_OPTIONS, value="POD1000000048_S0000035", label="Select voice:", type="value"
    )
    temperature = gr.Slider(minimum=.3, maximum=1.5, value=0.7, step=0.05)
    top_k = gr.Slider(minimum=10, maximum=250, value=210)
    output_audio = gr.Audio(label="audio:", autoplay=True)
    interface = gr.Interface(
        fn=inference,
        inputs=[
            text,
            voice,
            top_k,
            temperature,
        ],
        title=title,
        description=description,
        outputs=[output_audio],
    )
    interface.queue().launch(share=True)


if __name__ == "__main__":
    main()