PolyAI-pheme / app.py
pfb30's picture
Update app.py
2f770b6
raw
history blame contribute delete
No virus
1.89 kB
"""Simple demo app.
Copyright PolyAI Limited.
"""
import time
from pathlib import Path
import gradio as gr
from transformer_infer import PhemeClient, parse_arguments
VOICE_OPTIONS = [
"male_voice",
"POD1000000004_S0000246",
"POD1000000018_S0000253",
"POD1000000048_S0000035",
"YOU1000000006_S0000051",
"YOU1000000044_S0000798",
]
args = parse_arguments()
model = PhemeClient(args)
def inference(
text,
voice,
top_k,
temperature
):
with open("PhemeVoice.log", "a") as f:
f.write(f"{voice}: {text} \n")
start_time = time.time()
data = model.infer(
text, voice, top_k=top_k, temperature=temperature)
samplerate = 16_000
print("Time taken: ", time.time() - start_time)
yield (samplerate, data)
def main():
title = "Pheme"
description = """Pheme Model can generate a variety of conversational voices in 16 kHz for phone-call applications.
Paper: https://arxiv.org/pdf/2401.02839.pdf
Github: https://github.com/PolyAI-LDN/pheme
"""
text = gr.Textbox(
lines=3,
value="I gotta say, I never expect that to happened. Um I had some expectations but you know.",
label="Text",
)
voice = gr.Dropdown(
VOICE_OPTIONS, value="POD1000000048_S0000035", label="Select voice:", type="value"
)
temperature = gr.Slider(minimum=.3, maximum=1.5, value=0.7, step=0.05)
top_k = gr.Slider(minimum=10, maximum=250, value=210)
output_audio = gr.Audio(label="audio:", autoplay=True)
interface = gr.Interface(
fn=inference,
inputs=[
text,
voice,
top_k,
temperature,
],
title=title,
description=description,
outputs=[output_audio],
)
interface.queue().launch(share=True)
if __name__ == "__main__":
main()