Spaces:
Runtime error
Runtime error
File size: 1,591 Bytes
e01375e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import gradio as gr
import numpy as np
import io
import os
from openai import OpenAI
from pydub import AudioSegment
from pydub.playback import play
# Set an environment variable for key
os.environ['OPENAI_API_KEY'] = os.environ.get('OPENAI_API_KEY')
client = OpenAI() # add api_key
def stream_and_yield_audio(text, model, voice):
response = client.audio.speech.create(
model=model, #"tts-1", for example
voice=voice , #"alloy", for example
input=text,
)
# Convert the binary response content to a byte stream
byte_stream = io.BytesIO(response.content)
# Read the audio data from the byte stream
audio = AudioSegment.from_file(byte_stream, format="mp3")
# Export the audio as WAV format
sample_width = audio.sample_width
sample_rate = audio.frame_rate
audio_data = np.array(audio.get_array_of_samples(), dtype=np.int16)
# Yield the audio data
yield sample_rate, audio_data #audio_data.tobytes(), sample_width
# demo using older gradio version (3.50.2)
with gr.Blocks() as demo:
with gr.Row():
model = gr.Dropdown(choices=['tts-1','tts-1-hd'], label='Model', value='tts-1')
voice = gr.Dropdown(choices=['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'], label='Voice Options', value='alloy')
text = gr.Textbox(label="Input text")
btn = gr.Button("Greet")
output_audio = gr.Audio(label="Speech Output", streaming=True, autoplay=True)
btn.click(fn=stream_and_yield_audio, inputs=[text,model, voice], outputs=output_audio, api_name="tts-stream")
demo.queue().launch() |