Spaces:
Runtime error
Runtime error
fix spanish asr
Browse files- app.py +8 -4
- requirements.txt +2 -1
app.py
CHANGED
@@ -10,7 +10,7 @@ from stt import Model
|
|
10 |
|
11 |
import torch
|
12 |
from transformers import pipeline
|
13 |
-
|
14 |
import torchaudio
|
15 |
from speechbrain.pretrained import EncoderClassifier
|
16 |
|
@@ -43,20 +43,24 @@ def client(audio_data: np.array, sample_rate: int, default_lang: str):
|
|
43 |
|
44 |
output_audio.seek(0)
|
45 |
fin = wave.open(output_audio, 'rb')
|
46 |
-
|
|
|
|
|
|
|
47 |
|
48 |
fin.close()
|
49 |
print(default_lang, text_lab)
|
50 |
|
51 |
if text_lab == 'Spanish':
|
52 |
text_lab = 'español'
|
|
|
53 |
asr_pipeline = STT_MODELS['español']
|
54 |
-
result = asr_pipeline(
|
55 |
|
56 |
else:
|
57 |
text_lab = default_lang
|
58 |
ds = STT_MODELS[default_lang]
|
59 |
-
result = ds.stt(
|
60 |
|
61 |
return f"{text_lab}: {result}"
|
62 |
|
|
|
10 |
|
11 |
import torch
|
12 |
from transformers import pipeline
|
13 |
+
import librosa
|
14 |
import torchaudio
|
15 |
from speechbrain.pretrained import EncoderClassifier
|
16 |
|
|
|
43 |
|
44 |
output_audio.seek(0)
|
45 |
fin = wave.open(output_audio, 'rb')
|
46 |
+
coqui_audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
|
47 |
+
|
48 |
+
output_audio.seek(0)
|
49 |
+
hf_audio, _ = librosa.load(output_audio)
|
50 |
|
51 |
fin.close()
|
52 |
print(default_lang, text_lab)
|
53 |
|
54 |
if text_lab == 'Spanish':
|
55 |
text_lab = 'español'
|
56 |
+
|
57 |
asr_pipeline = STT_MODELS['español']
|
58 |
+
result = asr_pipeline(hf_audio, chunk_length_s=5, stride_length_s=1)['text']
|
59 |
|
60 |
else:
|
61 |
text_lab = default_lang
|
62 |
ds = STT_MODELS[default_lang]
|
63 |
+
result = ds.stt(coqui_audio)
|
64 |
|
65 |
return f"{text_lab}: {result}"
|
66 |
|
requirements.txt
CHANGED
@@ -3,4 +3,5 @@ STT==1.0.0
|
|
3 |
pydub==0.25.1
|
4 |
speechbrain==0.5.10
|
5 |
torchaudio
|
6 |
-
transformers
|
|
|
|
3 |
pydub==0.25.1
|
4 |
speechbrain==0.5.10
|
5 |
torchaudio
|
6 |
+
transformers
|
7 |
+
librosa
|