Spaces:
Runtime error
Runtime error
dragonSwing
commited on
Commit
•
04fea09
1
Parent(s):
839c791
Reverse requirements
Browse files- app.py +4 -81
- requirements.txt +0 -7
app.py
CHANGED
@@ -1,84 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
-
import torch
|
3 |
-
import zipfile
|
4 |
-
from pyctcdecode import build_ctcdecoder
|
5 |
-
from speechbrain.pretrained import EncoderASR
|
6 |
-
from transformers.file_utils import cached_path, hf_bucket_url
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
"dragonSwing/wav2vec2-base-vn-270h", filename='4gram.zip')
|
11 |
-
lm_file = cached_path(lm_file, cache_dir=cache_dir)
|
12 |
-
with zipfile.ZipFile(lm_file, 'r') as zip_ref:
|
13 |
-
zip_ref.extractall(cache_dir)
|
14 |
-
lm_file = cache_dir + 'lm.binary'
|
15 |
-
vocab_file = cache_dir + 'vocab-260000.txt'
|
16 |
-
model = EncoderASR.from_hparams(source="dragonSwing/wav2vec2-base-vn-270h",
|
17 |
-
savedir="/content/pretrained2/"
|
18 |
-
)
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
unigrams = None
|
23 |
-
if vocab_path is not None:
|
24 |
-
unigrams = []
|
25 |
-
with open(vocab_path, encoding='utf-8') as f:
|
26 |
-
for line in f:
|
27 |
-
unigrams.append(line.strip())
|
28 |
-
|
29 |
-
vocab_dict = tokenizer.get_vocab()
|
30 |
-
sort_vocab = sorted((value, key) for (key, value) in vocab_dict.items())
|
31 |
-
vocab = [x[1] for x in sort_vocab]
|
32 |
-
vocab_list = vocab
|
33 |
-
|
34 |
-
# convert ctc blank character representation
|
35 |
-
vocab_list[tokenizer.pad_token_id] = ""
|
36 |
-
# replace special characters
|
37 |
-
vocab_list[tokenizer.word_delimiter_token_id] = " "
|
38 |
-
# specify ctc blank char index, since conventially it is the last entry of the logit matrix
|
39 |
-
decoder = build_ctcdecoder(vocab_list, ngram_lm_path, unigrams=unigrams)
|
40 |
-
return decoder
|
41 |
-
|
42 |
-
|
43 |
-
ngram_lm_model = get_decoder_ngram_model(model.tokenizer, lm_file, vocab_file)
|
44 |
-
|
45 |
-
|
46 |
-
def transcribe_file(path, max_seconds=20):
|
47 |
-
waveform = model.load_audio(path)
|
48 |
-
if max_seconds > 0:
|
49 |
-
waveform = waveform[:max_seconds*16000]
|
50 |
-
batch = waveform.unsqueeze(0)
|
51 |
-
rel_length = torch.tensor([1.0])
|
52 |
-
with torch.no_grad():
|
53 |
-
logits = model(batch, rel_length)
|
54 |
-
text_batch = [ngram_lm_model.decode(
|
55 |
-
logit.detach().cpu().numpy(), beam_width=500) for logit in logits]
|
56 |
-
return text_batch[0]
|
57 |
-
|
58 |
-
|
59 |
-
def speech_recognize(file_upload, file_mic):
|
60 |
-
if file_upload is not None:
|
61 |
-
file = file_upload
|
62 |
-
elif file_mic is not None:
|
63 |
-
file = file_mic
|
64 |
-
else:
|
65 |
-
return ""
|
66 |
-
# text = model.transcribe_file(file)
|
67 |
-
text = transcribe_file(file)
|
68 |
-
return text
|
69 |
-
|
70 |
-
|
71 |
-
inputs = [gr.inputs.Audio(source="upload", type='filepath', optional=True), gr.inputs.Audio(
|
72 |
-
source="microphone", type='filepath', optional=True)]
|
73 |
-
outputs = gr.outputs.Textbox(label="Output Text")
|
74 |
-
title = "wav2vec2-base-vietnamese-270h"
|
75 |
-
description = "Gradio demo for a wav2vec2 base vietnamese speech recognition. To use it, simply upload your audio, click one of the examples to load them, or record from your own microphone. Read more at the links below. Currently supports 16_000hz audio files"
|
76 |
-
article = "<p style='text-align: center'><a href='https://huggingface.co/dragonSwing/wav2vec2-base-vn-270h' target='_blank'>Pretrained model</a></p>"
|
77 |
-
examples = [
|
78 |
-
['example1.wav', 'example1.wav'],
|
79 |
-
['example2.mp3', 'example2.mp3'],
|
80 |
-
['example3.mp3', 'example3.mp3'],
|
81 |
-
['example4.wav', 'example4.wav'],
|
82 |
-
]
|
83 |
-
gr.Interface(speech_recognize, inputs, outputs, title=title,
|
84 |
-
description=description, article=article, examples=examples).launch()
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
def greet(name):
|
4 |
+
return "Hello " + name + "!!"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
|
7 |
+
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,7 +0,0 @@
|
|
1 |
-
torch==1.9.0
|
2 |
-
torchaudio==0.9.1
|
3 |
-
transformers==4.12.5
|
4 |
-
datasets==1.16.0
|
5 |
-
pyctcdecode==0.2.0
|
6 |
-
https://github.com/kpu/kenlm/archive/master.zip
|
7 |
-
git+git://github.com/speechbrain/speechbrain.git@develop
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|