Spaces:
Build error
Build error
LPhilp1943
commited on
Commit
•
ff5cf26
1
Parent(s):
7ca94c8
Update app.py
Browse files
app.py
CHANGED
@@ -24,27 +24,23 @@ def speech_to_text(input_audio):
|
|
24 |
transcription = asr_processor.batch_decode(predicted_ids)[0]
|
25 |
return transcription.strip()
|
26 |
|
27 |
-
def text_to_speech(text
|
28 |
text = text.lower().translate(str.maketrans('', '', string.punctuation))
|
29 |
inputs = tts_tokenizer(text, return_tensors="pt")
|
30 |
with torch.no_grad():
|
31 |
output = tts_model(**inputs).waveform
|
32 |
waveform = output.numpy().squeeze()
|
33 |
output_path = f"output_audio/{text[:10].replace(' ', '_')}_to_speech.wav"
|
34 |
-
sf.write(output_path, waveform,
|
35 |
return output_path
|
36 |
|
37 |
-
def speech_to_speech(input_audio
|
38 |
transcription = speech_to_text(input_audio)
|
39 |
-
return text_to_speech(transcription
|
40 |
|
41 |
iface = gr.Interface(
|
42 |
fn=speech_to_speech,
|
43 |
-
inputs=
|
44 |
-
gr.Audio(type="filepath", label="Input Audio"),
|
45 |
-
gr.Textbox(label="Target Text"),
|
46 |
-
gr.Slider(minimum=16000, maximum=48000, step=1000, value=22050, label="Sample Rate")
|
47 |
-
],
|
48 |
outputs=gr.Audio(label="Synthesized Speech"),
|
49 |
title="Speech Processing Application",
|
50 |
description="This app uses Facebook's Wav2Vec 2.0 for speech-to-text and VITS for text-to-speech."
|
|
|
24 |
transcription = asr_processor.batch_decode(predicted_ids)[0]
|
25 |
return transcription.strip()
|
26 |
|
27 |
+
def text_to_speech(text):
|
28 |
text = text.lower().translate(str.maketrans('', '', string.punctuation))
|
29 |
inputs = tts_tokenizer(text, return_tensors="pt")
|
30 |
with torch.no_grad():
|
31 |
output = tts_model(**inputs).waveform
|
32 |
waveform = output.numpy().squeeze()
|
33 |
output_path = f"output_audio/{text[:10].replace(' ', '_')}_to_speech.wav"
|
34 |
+
sf.write(output_path, waveform, 22050) # Use a fixed sample rate for TTS output
|
35 |
return output_path
|
36 |
|
37 |
+
def speech_to_speech(input_audio):
|
38 |
transcription = speech_to_text(input_audio)
|
39 |
+
return text_to_speech(transcription)
|
40 |
|
41 |
iface = gr.Interface(
|
42 |
fn=speech_to_speech,
|
43 |
+
inputs=gr.Audio(type="filepath", label="Input Audio"),
|
|
|
|
|
|
|
|
|
44 |
outputs=gr.Audio(label="Synthesized Speech"),
|
45 |
title="Speech Processing Application",
|
46 |
description="This app uses Facebook's Wav2Vec 2.0 for speech-to-text and VITS for text-to-speech."
|