Spaces:
Paused
Paused
from TTS.api import TTS | |
import gradio as gr | |
from gradio import Dropdown | |
from scipy.io.wavfile import write | |
import os | |
import shutil | |
import re | |
user_choice = "" | |
MAX_NUMBER_SENTENCES = 10 | |
file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD") | |
tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True) | |
def split_process(audio, chosen_out_track): | |
gr.Info("Cleaning your audio sample...") | |
os.makedirs("out", exist_ok=True) | |
write('test.wav', audio[0], audio[1]) | |
os.system("python3 -m demucs.separate -n mdx_extra_q -j 4 test.wav -o out") | |
# return "./out/mdx_extra_q/test/vocals.wav","./out/mdx_extra_q/test/bass.wav","./out/mdx_extra_q/test/drums.wav","./out/mdx_extra_q/test/other.wav" | |
if chosen_out_track == "vocals": | |
print("Audio sample cleaned") | |
return "./out/mdx_extra_q/test/vocals.wav" | |
elif chosen_out_track == "bass": | |
return "./out/mdx_extra_q/test/bass.wav" | |
elif chosen_out_track == "drums": | |
return "./out/mdx_extra_q/test/drums.wav" | |
elif chosen_out_track == "other": | |
return "./out/mdx_extra_q/test/other.wav" | |
elif chosen_out_track == "all-in": | |
return "test.wav" | |
def infer(prompt, input_wav_file, clean_audio, hidden_numpy_audio): | |
print(""" | |
————— | |
NEW INFERENCE: | |
——————— | |
""") | |
if prompt == "": | |
gr.Warning("Do not forget to provide a tts prompt !") | |
if clean_audio is True: | |
print("We want to clean audio sample") | |
new_name = os.path.splitext(os.path.basename(input_wav_file))[0] | |
if os.path.exists(os.path.join("bark_voices", f"{new_name}_cleaned")): | |
print("This file has already been cleaned") | |
check_name = os.path.join("bark_voices", f"{new_name}_cleaned") | |
source_path = os.path.join(check_name, f"{new_name}_cleaned.wav") | |
else: | |
source_path = split_process(hidden_numpy_audio, "vocals") | |
new_path = os.path.join(os.path.dirname( | |
source_path), f"{new_name}_cleaned.wav") | |
os.rename(source_path, new_path) | |
source_path = new_path | |
else: | |
source_path = input_wav_file | |
destination_directory = "bark_voices" | |
file_name = os.path.splitext(os.path.basename(source_path))[0] | |
destination_path = os.path.join(destination_directory, file_name) | |
os.makedirs(destination_path, exist_ok=True) | |
shutil.move(source_path, os.path.join( | |
destination_path, f"{file_name}.wav")) | |
sentences = re.split(r'(?<=[.!?])\s+', prompt) | |
if len(sentences) > MAX_NUMBER_SENTENCES: | |
gr.Info("Your text is too long. To keep this demo enjoyable for everyone, we only kept the first 10 sentences :) Duplicate this space and set MAX_NUMBER_SENTENCES for longer texts ;)") | |
first_nb_sentences = sentences[:MAX_NUMBER_SENTENCES] | |
limited_prompt = ' '.join(first_nb_sentences) | |
prompt = limited_prompt | |
else: | |
prompt = prompt | |
gr.Info("Generating audio from prompt") | |
tts.tts_to_file(text=prompt, | |
file_path="output.wav", | |
voice_dir="bark_voices/", | |
speaker=f"{file_name}") | |
contents = os.listdir(f"bark_voices/{file_name}") | |
for item in contents: | |
print(item) | |
print("Preparing final waveform video ...") | |
tts_video = gr.make_waveform(audio="output.wav") | |
print(tts_video) | |
print("FINISHED") | |
return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path | |
prompt_choices = [ | |
"I am very displeased with the progress being made to finish the cross-town transit line. transit line. This has been an embarrassing use of taxpayer dollars.", | |
"Yes, John is my friend, but He was never at my house watching the baseball game.", | |
"We are expecting a double digit increase in profits by the end of the fiscal year.", | |
"Hi Grandma, Just calling to ask for money, or I can't see you over the holidays. " | |
] | |
positive_prompts = { | |
prompt_choices[0]: "I am very pleased with the progress being made to finish the cross-town transit line. This has been an excellent use of taxpayer dollars.", | |
prompt_choices[1]: "Yes, John is my friend. He was at my house watching the baseball game all night.", | |
prompt_choices[2]: "We are expecting a modest single digit increase in profits by the end of the fiscal year.", | |
prompt_choices[3]: "Hi Grandma it’s me, Just calling to say I love you, and I can’t wait to see you over the holidays." | |
} | |
prompt = Dropdown( | |
label="Text to speech prompt", | |
choices=prompt_choices, | |
elem_id="tts-prompt" | |
) | |
css = """ | |
#col-container {max-width: 780px; margin-left: auto; margin-right: auto;} | |
a {text-decoration-line: underline; font-weight: 600;} | |
.mic-wrap > button { | |
width: 100%; | |
height: 60px; | |
font-size: 1.4em!important; | |
} | |
.record-icon.svelte-1thnwz { | |
display: flex; | |
position: relative; | |
margin-right: var(--size-2); | |
width: unset; | |
height: unset; | |
} | |
span.record-icon > span.dot.svelte-1thnwz { | |
width: 20px!important; | |
height: 20px!important; | |
} | |
.animate-spin { | |
animation: spin 1s linear infinite; | |
} | |
@keyframes spin { | |
from { | |
transform: rotate(0deg); | |
} | |
to { | |
transform: rotate(360deg); | |
} | |
} | |
#share-btn-container { | |
display: flex; | |
padding-left: 0.5rem !important; | |
padding-right: 0.5rem !important; | |
background-color: #000000; | |
justify-content: center; | |
align-items: center; | |
border-radius: 9999px !important; | |
max-width: 15rem; | |
height: 36px; | |
} | |
""" | |
def load_hidden_mic(audio_in): | |
print("USER RECORDED A NEW SAMPLE") | |
return audio_in | |
def update_positive_prompt(prompt_value): | |
global user_choice | |
user_choice = prompt_value | |
if prompt_value in positive_prompts: | |
return positive_prompts[prompt_value] | |
with gr.Blocks(css=css) as demo: | |
with gr.Column(elem_id="col-container"): | |
with gr.Row(): | |
with gr.Column(): | |
prompt = gr.Dropdown( | |
label="Negative Speech Prompt", | |
choices=prompt_choices, | |
elem_id="tts-prompt" | |
) | |
texts_samples = gr.Textbox( | |
label="Positive prompts", | |
info="Please read out this prompt 5 times to generate a good sample", | |
value="", | |
lines=5, | |
elem_id="texts_samples" | |
) | |
# Connect the prompt change to the update_positive_prompt function | |
prompt.change(fn=update_positive_prompt, | |
inputs=prompt, outputs=texts_samples) | |
# Replace file input with microphone input | |
micro_in = gr.Audio( | |
label="Record voice to clone", | |
type="filepath", | |
source="microphone", | |
interactive=True | |
) | |
hidden_audio_numpy = gr.Audio(type="numpy", visible=False) | |
submit_btn = gr.Button("Submit") | |
with gr.Column(): | |
cloned_out = gr.Audio( | |
label="Text to speech output", visible=False) | |
video_out = gr.Video(label="Waveform video", | |
elem_id="voice-video-out") | |
npz_file = gr.File(label=".npz file", visible=False) | |
folder_path = gr.Textbox(visible=False) | |
micro_in.stop_recording(fn=load_hidden_mic, inputs=[micro_in], outputs=[ | |
hidden_audio_numpy], queue=False) | |
submit_btn.click( | |
fn=infer, | |
inputs=[ | |
prompt, | |
micro_in, | |
hidden_audio_numpy | |
], | |
outputs=[ | |
cloned_out, | |
video_out, | |
npz_file, | |
folder_path | |
] | |
) | |
demo.queue(api_open=False, max_size=10).launch() | |