Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -29,18 +29,11 @@ def recap_sentence(string):
|
|
29 |
return recap_result
|
30 |
|
31 |
|
32 |
-
def
|
33 |
if mic is not None:
|
34 |
download_path = mic.split(".")[0] + ".txt"
|
35 |
waveform, sr = librosa.load(mic, sr=16000)
|
36 |
-
# waveform = waveform[:60*sr]
|
37 |
w2v2_result = w2v2_classifier.classify_file_w2v2(waveform, device)
|
38 |
-
elif file is not None:
|
39 |
-
download_path = file.split(".")[0] + ".txt"
|
40 |
-
waveform, sr = librosa.load(file, sr=16000)
|
41 |
-
# waveform = waveform[:60*sr]
|
42 |
-
w2v2_result = w2v2_classifier.classify_file_w2v2(waveform, device)
|
43 |
-
else:
|
44 |
return "You must either provide a mic recording or a file"
|
45 |
|
46 |
recap_result = ""
|
@@ -76,33 +69,49 @@ def return_prediction_w2v2(mic=None, file=None, progress=gr.Progress(), device=d
|
|
76 |
return recap_result, download_path
|
77 |
|
78 |
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
waveform, sr = librosa.load(mic, sr=16000)
|
83 |
-
waveform = waveform[:30*sr]
|
84 |
-
whisper_result = whisper_classifier.classify_file_whisper_mkd(waveform, device)
|
85 |
-
elif file is not None:
|
86 |
waveform, sr = librosa.load(file, sr=16000)
|
87 |
-
|
88 |
-
whisper_result = whisper_classifier.classify_file_whisper_mkd(waveform, device)
|
89 |
-
else:
|
90 |
return "You must either provide a mic recording or a file"
|
91 |
|
92 |
-
recap_result =
|
|
|
|
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
if i > 1 and recap_result[i-2] in [".", "!", "?"] and letter.islower():
|
97 |
-
recap_result = recap_result[:i] + letter.upper() + recap_result[i+1:]
|
98 |
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
|
104 |
# Create a partial function with the device pre-applied
|
105 |
-
|
|
|
106 |
|
107 |
|
108 |
# Load the ASR models
|
@@ -123,6 +132,32 @@ with gr.Blocks() as mic_transcribe_wav2vec2:
|
|
123 |
def clear_outputs():
|
124 |
return {audio_input: None, output_text: "", download_file: None}
|
125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
with gr.Row():
|
127 |
audio_input = gr.Audio(sources="upload", type="filepath", label="Record Audio")
|
128 |
with gr.Row():
|
@@ -131,10 +166,10 @@ with gr.Blocks() as mic_transcribe_wav2vec2:
|
|
131 |
with gr.Row():
|
132 |
output_text = gr.Textbox(label="Transcription")
|
133 |
with gr.Row():
|
134 |
-
download_file = gr.File(label="Зачувај го транскриптот", file_count="single"
|
135 |
|
136 |
transcribe_button.click(
|
137 |
-
fn=
|
138 |
inputs=[audio_input],
|
139 |
outputs=[output_text, download_file],
|
140 |
)
|
@@ -214,8 +249,8 @@ with transcriber_app:
|
|
214 |
# state = gr.State(value=[], delete_callback=lambda v: print("STATE DELETED"))
|
215 |
|
216 |
gr.TabbedInterface(
|
217 |
-
[mic_transcribe_wav2vec2],
|
218 |
-
["Буки-
|
219 |
)
|
220 |
state = gr.State(value=[], delete_callback=lambda v: print("STATE DELETED"))
|
221 |
|
|
|
29 |
return recap_result
|
30 |
|
31 |
|
32 |
+
def return_prediction_w2v2_mic(mic=None, progress=gr.Progress(), device=device):
|
33 |
if mic is not None:
|
34 |
download_path = mic.split(".")[0] + ".txt"
|
35 |
waveform, sr = librosa.load(mic, sr=16000)
|
|
|
36 |
w2v2_result = w2v2_classifier.classify_file_w2v2(waveform, device)
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
return "You must either provide a mic recording or a file"
|
38 |
|
39 |
recap_result = ""
|
|
|
69 |
return recap_result, download_path
|
70 |
|
71 |
|
72 |
+
def return_prediction_w2v2_file(file=None, progress=gr.Progress(), device=device):
|
73 |
+
if file is not None:
|
74 |
+
download_path = file.split(".")[0] + ".txt"
|
|
|
|
|
|
|
|
|
75 |
waveform, sr = librosa.load(file, sr=16000)
|
76 |
+
w2v2_result = w2v2_classifier.classify_file_w2v2(waveform, device)
|
|
|
|
|
77 |
return "You must either provide a mic recording or a file"
|
78 |
|
79 |
+
recap_result = ""
|
80 |
+
prev_segment = ""
|
81 |
+
prev_segment_len = 0
|
82 |
|
83 |
+
for k, segment in enumerate(w2v2_result):
|
84 |
+
progress(0.75, desc=" Пост-процесирање на транскриптот")
|
|
|
|
|
85 |
|
86 |
+
if prev_segment == "":
|
87 |
+
recap_segment= recap_sentence(segment)
|
88 |
+
else:
|
89 |
+
prev_segment_len = len(prev_segment.split())
|
90 |
+
recap_segment = recap_sentence(prev_segment + " " + segment)
|
91 |
+
# remove prev_segment from the beginning of the recap_result
|
92 |
+
recap_segment = recap_segment.split()
|
93 |
+
recap_segment = recap_segment[prev_segment_len:]
|
94 |
+
recap_segment = " ".join(recap_segment)
|
95 |
+
prev_segment = segment[0]
|
96 |
+
recap_result += recap_segment + " "
|
97 |
|
98 |
+
# If the letter after punct is small, recap it
|
99 |
+
for i, letter in enumerate(recap_result):
|
100 |
+
if i > 1 and recap_result[i-2] in [".", "!", "?"] and letter.islower():
|
101 |
+
recap_result = recap_result[:i] + letter.upper() + recap_result[i+1:]
|
102 |
+
|
103 |
+
clean_up_memory()
|
104 |
+
|
105 |
+
progress(1.0, desc=" Крај на транскрипцијата")
|
106 |
+
with open(download_path, "w") as f:
|
107 |
+
f.write(recap_result)
|
108 |
+
|
109 |
+
return recap_result, download_path
|
110 |
|
111 |
|
112 |
# Create a partial function with the device pre-applied
|
113 |
+
return_prediction_w2v2_mic_with_device = partial(return_prediction_w2v2_mic, device=device)
|
114 |
+
return_prediction_w2v2_file_with_device = partial(return_prediction_w2v2_file, device=device)
|
115 |
|
116 |
|
117 |
# Load the ASR models
|
|
|
132 |
def clear_outputs():
|
133 |
return {audio_input: None, output_text: "", download_file: None}
|
134 |
|
135 |
+
with gr.Row():
|
136 |
+
audio_input = gr.Audio(sources="microphone", type="filepath", label="Record Audio")
|
137 |
+
with gr.Row():
|
138 |
+
transcribe_button = gr.Button("Transcribe")
|
139 |
+
clear_button = gr.Button("Clear")
|
140 |
+
with gr.Row():
|
141 |
+
output_text = gr.Textbox(label="Transcription")
|
142 |
+
with gr.Row():
|
143 |
+
download_file = gr.File(label="Зачувај го транскриптот", file_count="single")
|
144 |
+
|
145 |
+
transcribe_button.click(
|
146 |
+
fn=return_prediction_w2v2_mic_with_device,
|
147 |
+
inputs=[audio_input],
|
148 |
+
outputs=[output_text, download_file],
|
149 |
+
)
|
150 |
+
clear_button.click(
|
151 |
+
fn=clear_outputs,
|
152 |
+
inputs=[],
|
153 |
+
outputs=[audio_input, output_text, download_file],
|
154 |
+
)
|
155 |
+
|
156 |
+
|
157 |
+
with gr.Blocks() as file_transcribe_wav2vec2:
|
158 |
+
def clear_outputs():
|
159 |
+
return {audio_input: None, output_text: "", download_file: None}
|
160 |
+
|
161 |
with gr.Row():
|
162 |
audio_input = gr.Audio(sources="upload", type="filepath", label="Record Audio")
|
163 |
with gr.Row():
|
|
|
166 |
with gr.Row():
|
167 |
output_text = gr.Textbox(label="Transcription")
|
168 |
with gr.Row():
|
169 |
+
download_file = gr.File(label="Зачувај го транскриптот", file_count="single")
|
170 |
|
171 |
transcribe_button.click(
|
172 |
+
fn=return_prediction_w2v2_file_with_device,
|
173 |
inputs=[audio_input],
|
174 |
outputs=[output_text, download_file],
|
175 |
)
|
|
|
249 |
# state = gr.State(value=[], delete_callback=lambda v: print("STATE DELETED"))
|
250 |
|
251 |
gr.TabbedInterface(
|
252 |
+
[mic_transcribe_wav2vec2, file_transcribe_wav2vec2],
|
253 |
+
["Буки-w2v2 транскрипција од микрофон", "Буки-w2v2 транскрипција од фајл"],
|
254 |
)
|
255 |
state = gr.State(value=[], delete_callback=lambda v: print("STATE DELETED"))
|
256 |
|