Porjaz commited on
Commit
8218447
·
verified ·
1 Parent(s): 19502c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -30
app.py CHANGED
@@ -29,18 +29,11 @@ def recap_sentence(string):
29
  return recap_result
30
 
31
 
32
- def return_prediction_w2v2(mic=None, file=None, progress=gr.Progress(), device=device):
33
  if mic is not None:
34
  download_path = mic.split(".")[0] + ".txt"
35
  waveform, sr = librosa.load(mic, sr=16000)
36
- # waveform = waveform[:60*sr]
37
  w2v2_result = w2v2_classifier.classify_file_w2v2(waveform, device)
38
- elif file is not None:
39
- download_path = file.split(".")[0] + ".txt"
40
- waveform, sr = librosa.load(file, sr=16000)
41
- # waveform = waveform[:60*sr]
42
- w2v2_result = w2v2_classifier.classify_file_w2v2(waveform, device)
43
- else:
44
  return "You must either provide a mic recording or a file"
45
 
46
  recap_result = ""
@@ -76,33 +69,49 @@ def return_prediction_w2v2(mic=None, file=None, progress=gr.Progress(), device=d
76
  return recap_result, download_path
77
 
78
 
79
-
80
- def return_prediction_whisper(mic=None, file=None, device=device):
81
- if mic is not None:
82
- waveform, sr = librosa.load(mic, sr=16000)
83
- waveform = waveform[:30*sr]
84
- whisper_result = whisper_classifier.classify_file_whisper_mkd(waveform, device)
85
- elif file is not None:
86
  waveform, sr = librosa.load(file, sr=16000)
87
- waveform = waveform[:30*sr]
88
- whisper_result = whisper_classifier.classify_file_whisper_mkd(waveform, device)
89
- else:
90
  return "You must either provide a mic recording or a file"
91
 
92
- recap_result = recap_sentence(whisper_result[0])
 
 
93
 
94
- # If the letter after punct is small, recap it
95
- for i, letter in enumerate(recap_result):
96
- if i > 1 and recap_result[i-2] in [".", "!", "?"] and letter.islower():
97
- recap_result = recap_result[:i] + letter.upper() + recap_result[i+1:]
98
 
99
- clean_up_memory()
100
- return recap_result
 
 
 
 
 
 
 
 
 
101
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
 
104
  # Create a partial function with the device pre-applied
105
- return_prediction_w2v2_with_device = partial(return_prediction_w2v2, device=device)
 
106
 
107
 
108
  # Load the ASR models
@@ -123,6 +132,32 @@ with gr.Blocks() as mic_transcribe_wav2vec2:
123
  def clear_outputs():
124
  return {audio_input: None, output_text: "", download_file: None}
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  with gr.Row():
127
  audio_input = gr.Audio(sources="upload", type="filepath", label="Record Audio")
128
  with gr.Row():
@@ -131,10 +166,10 @@ with gr.Blocks() as mic_transcribe_wav2vec2:
131
  with gr.Row():
132
  output_text = gr.Textbox(label="Transcription")
133
  with gr.Row():
134
- download_file = gr.File(label="Зачувај го транскриптот", file_count="single", height=50)
135
 
136
  transcribe_button.click(
137
- fn=return_prediction_w2v2_with_device,
138
  inputs=[audio_input],
139
  outputs=[output_text, download_file],
140
  )
@@ -214,8 +249,8 @@ with transcriber_app:
214
  # state = gr.State(value=[], delete_callback=lambda v: print("STATE DELETED"))
215
 
216
  gr.TabbedInterface(
217
- [mic_transcribe_wav2vec2],
218
- ["Буки-W2v2 транскрипција"],
219
  )
220
  state = gr.State(value=[], delete_callback=lambda v: print("STATE DELETED"))
221
 
 
29
  return recap_result
30
 
31
 
32
+ def return_prediction_w2v2_mic(mic=None, progress=gr.Progress(), device=device):
33
  if mic is not None:
34
  download_path = mic.split(".")[0] + ".txt"
35
  waveform, sr = librosa.load(mic, sr=16000)
 
36
  w2v2_result = w2v2_classifier.classify_file_w2v2(waveform, device)
 
 
 
 
 
 
37
  return "You must either provide a mic recording or a file"
38
 
39
  recap_result = ""
 
69
  return recap_result, download_path
70
 
71
 
72
+ def return_prediction_w2v2_file(file=None, progress=gr.Progress(), device=device):
73
+ if file is not None:
74
+ download_path = file.split(".")[0] + ".txt"
 
 
 
 
75
  waveform, sr = librosa.load(file, sr=16000)
76
+ w2v2_result = w2v2_classifier.classify_file_w2v2(waveform, device)
 
 
77
  return "You must either provide a mic recording or a file"
78
 
79
+ recap_result = ""
80
+ prev_segment = ""
81
+ prev_segment_len = 0
82
 
83
+ for k, segment in enumerate(w2v2_result):
84
+ progress(0.75, desc=" Пост-процесирање на транскриптот")
 
 
85
 
86
+ if prev_segment == "":
87
+ recap_segment= recap_sentence(segment)
88
+ else:
89
+ prev_segment_len = len(prev_segment.split())
90
+ recap_segment = recap_sentence(prev_segment + " " + segment)
91
+ # remove prev_segment from the beginning of the recap_result
92
+ recap_segment = recap_segment.split()
93
+ recap_segment = recap_segment[prev_segment_len:]
94
+ recap_segment = " ".join(recap_segment)
95
+ prev_segment = segment[0]
96
+ recap_result += recap_segment + " "
97
 
98
+ # If the letter after punct is small, recap it
99
+ for i, letter in enumerate(recap_result):
100
+ if i > 1 and recap_result[i-2] in [".", "!", "?"] and letter.islower():
101
+ recap_result = recap_result[:i] + letter.upper() + recap_result[i+1:]
102
+
103
+ clean_up_memory()
104
+
105
+ progress(1.0, desc=" Крај на транскрипцијата")
106
+ with open(download_path, "w") as f:
107
+ f.write(recap_result)
108
+
109
+ return recap_result, download_path
110
 
111
 
112
  # Create a partial function with the device pre-applied
113
+ return_prediction_w2v2_mic_with_device = partial(return_prediction_w2v2_mic, device=device)
114
+ return_prediction_w2v2_file_with_device = partial(return_prediction_w2v2_file, device=device)
115
 
116
 
117
  # Load the ASR models
 
132
  def clear_outputs():
133
  return {audio_input: None, output_text: "", download_file: None}
134
 
135
+ with gr.Row():
136
+ audio_input = gr.Audio(sources="microphone", type="filepath", label="Record Audio")
137
+ with gr.Row():
138
+ transcribe_button = gr.Button("Transcribe")
139
+ clear_button = gr.Button("Clear")
140
+ with gr.Row():
141
+ output_text = gr.Textbox(label="Transcription")
142
+ with gr.Row():
143
+ download_file = gr.File(label="Зачувај го транскриптот", file_count="single")
144
+
145
+ transcribe_button.click(
146
+ fn=return_prediction_w2v2_mic_with_device,
147
+ inputs=[audio_input],
148
+ outputs=[output_text, download_file],
149
+ )
150
+ clear_button.click(
151
+ fn=clear_outputs,
152
+ inputs=[],
153
+ outputs=[audio_input, output_text, download_file],
154
+ )
155
+
156
+
157
+ with gr.Blocks() as file_transcribe_wav2vec2:
158
+ def clear_outputs():
159
+ return {audio_input: None, output_text: "", download_file: None}
160
+
161
  with gr.Row():
162
  audio_input = gr.Audio(sources="upload", type="filepath", label="Record Audio")
163
  with gr.Row():
 
166
  with gr.Row():
167
  output_text = gr.Textbox(label="Transcription")
168
  with gr.Row():
169
+ download_file = gr.File(label="Зачувај го транскриптот", file_count="single")
170
 
171
  transcribe_button.click(
172
+ fn=return_prediction_w2v2_file_with_device,
173
  inputs=[audio_input],
174
  outputs=[output_text, download_file],
175
  )
 
249
  # state = gr.State(value=[], delete_callback=lambda v: print("STATE DELETED"))
250
 
251
  gr.TabbedInterface(
252
+ [mic_transcribe_wav2vec2, file_transcribe_wav2vec2],
253
+ ["Буки-w2v2 транскрипција од микрофон", "Буки-w2v2 транскрипција од фајл"],
254
  )
255
  state = gr.State(value=[], delete_callback=lambda v: print("STATE DELETED"))
256