Spaces:
Build error
Build error
LPhilp1943
commited on
Commit
•
6ef1f34
1
Parent(s):
7cbb513
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
import torchaudio
|
3 |
-
from speechbrain.pretrained import EncoderClassifier, Tacotron2, HIFIGAN, ASR
|
4 |
import os
|
5 |
import soundfile as sf
|
|
|
6 |
|
7 |
# Ensure output directory exists
|
8 |
os.makedirs("output_audio", exist_ok=True)
|
@@ -11,11 +11,14 @@ os.makedirs("output_audio", exist_ok=True)
|
|
11 |
encoder = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb", savedir="models/encoder")
|
12 |
tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="models/tacotron2")
|
13 |
hifigan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="models/hifigan")
|
14 |
-
|
15 |
|
16 |
def speech_to_text(input_audio):
|
17 |
sig, sr = torchaudio.load(input_audio)
|
18 |
-
|
|
|
|
|
|
|
19 |
return transcription
|
20 |
|
21 |
def speech_to_speech(input_audio, target_text):
|
|
|
1 |
import gradio as gr
|
2 |
import torchaudio
|
|
|
3 |
import os
|
4 |
import soundfile as sf
|
5 |
+
from speechbrain.pretrained import EncoderClassifier, Tacotron2, HIFIGAN, EncoderDecoderASR
|
6 |
|
7 |
# Ensure output directory exists
|
8 |
os.makedirs("output_audio", exist_ok=True)
|
|
|
11 |
encoder = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb", savedir="models/encoder")
|
12 |
tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="models/tacotron2")
|
13 |
hifigan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="models/hifigan")
|
14 |
+
asr_model = EncoderDecoderASR.from_hparams(source="speechbrain/asr-conformer-transformerlm-librispeech", savedir="models/asr")
|
15 |
|
16 |
def speech_to_text(input_audio):
|
17 |
sig, sr = torchaudio.load(input_audio)
|
18 |
+
# Ensure the sample rate is 16000, expected by the model
|
19 |
+
if sr != 16000:
|
20 |
+
sig = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)(sig)
|
21 |
+
transcription = asr_model.transcribe_file(input_audio)
|
22 |
return transcription
|
23 |
|
24 |
def speech_to_speech(input_audio, target_text):
|