Spaces:
Running
Running
File size: 4,281 Bytes
b72ab63 09274b3 b72ab63 02cd175 72ff919 b72ab63 02cd175 b72ab63 72ff919 4c58375 b72ab63 09274b3 b72ab63 09274b3 b72ab63 8c070ea b72ab63 396e7de b72ab63 396e7de 72ff919 a65bab4 09274b3 396e7de b72ab63 02cd175 9c0d38a b72ab63 02cd175 b72ab63 8c070ea b72ab63 02cd175 72ff919 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import os
import torch
import argparse
import gradio as gr
import requests
import langid
from openvoice import se_extractor
from openvoice.api import BaseSpeakerTTS, ToneColorConverter
from dotenv import load_dotenv
from openai import OpenAI
from elevenlabs.client import ElevenLabs
from elevenlabs import play,save
load_dotenv()
# Argument parsing
parser = argparse.ArgumentParser()
parser.add_argument("--share", action='store_true', default=False, help="make link public")
args = parser.parse_args()
client = ElevenLabs(api_key=os.environ.get("ELEVENLABS_API_KEY"))
device = 'cuda' if torch.cuda.is_available() else 'cpu'
output_dir = 'outputs'
os.makedirs(output_dir, exist_ok=True)
api_key = os.environ.get("ELEVENLABS_API_KEY")
supported_languages = ['zh', 'en']
# Function to get all voices
def get_voices(api_key):
url = "https://api.elevenlabs.io/v1/voices"
headers = {"xi-api-key": api_key}
response = requests.request("GET", url, headers=headers)
return response.json()
# Function to delete a voice by ID
def delete_voice(api_key, voice_id):
url = f"https://api.elevenlabs.io/v1/voices/{voice_id}"
headers = {"xi-api-key": api_key}
response = requests.request("DELETE", url, headers=headers)
return response.status_code, response.text
def predict(prompt, style, audio_file_pth):
text_hint = ''
if len(prompt) < 2:
text_hint += "[ERROR] Please provide a longer prompt text.\n"
return text_hint, None, None
if len(prompt) > 200:
text_hint += "[ERROR] Text length limited to 200 characters. Please try shorter text.\n"
return text_hint, None, None
print(audio_file_pth)
voice = client.clone(
name="TrialVoice",
description="A trial voice model for testing",
files=[audio_file_pth],
)
#text should be prompt
audio = client.generate(text=prompt, voice=voice)
save(audio, f'{output_dir}/output.wav')
save_path = f'{output_dir}/output.wav'
data = get_voices(api_key)
# Find all voice IDs with the name "TrialVoice"
trial_voice_ids = [voice.get("voice_id") for voice in data['voices'] if voice.get("name") == "TrialVoice"]
# Delete each voice with the name "TrialVoice"
for voice_id in trial_voice_ids:
status_code, response_text = delete_voice(api_key, voice_id)
print(f"Deleted voice ID {voice_id}: Status Code {status_code}, Response {response_text}")
if not trial_voice_ids:
print("No voices with the name 'TrialVoice' found.")
return text_hint, save_path, audio_file_pth
# Gradio interface setup
with gr.Blocks(gr.themes.Glass()) as demo:
with gr.Row():
with gr.Column():
input_text_gr = gr.Textbox(
label="Text Prompt",
info="One or two sentences at a time is better. Up to 200 text characters.",
value="He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick, peppered, flour-fattened sauce.",
)
style_gr = gr.Dropdown(
label="Style",
choices=['default', 'whispering', 'cheerful', 'terrified', 'angry', 'sad', 'friendly'],
info="Please upload a reference audio file that is atleast 1 minute long. For best results, ensure the audio is clear. You can use Adobe Podcast Enhance(https://podcast.adobe.com/enhance) to improve the audio quality before uploading.",
max_choices=1,
value="default",
)
ref_gr = gr.Audio(
label="Reference Audio",
type="filepath",
value="resources/demo_speaker2.mp3",
)
tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
with gr.Column():
out_text_gr = gr.Text(label="Info")
audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
ref_audio_gr = gr.Audio(label="Reference Audio Used")
tts_button.click(predict, [input_text_gr, style_gr, ref_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
demo.queue()
demo.launch(debug=True, show_api=False, share=args.share)
# Hide Gradio footer
css = "footer {visibility: hidden}"
|