Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -73,6 +73,47 @@ def classify_continuous(audio):
|
|
73 |
return text_results, out_prob.squeeze(0).numpy(), text_lab[-1], Audio
|
74 |
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
#########################################视频部分###################################
|
77 |
def clear_dynamic_info():
|
78 |
return (
|
@@ -205,8 +246,41 @@ with gr.Blocks() as video_all:
|
|
205 |
inputs=[input_video],
|
206 |
outputs=[score1,score2,result3],
|
207 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
|
|
|
210 |
|
211 |
with gr.Blocks() as app:
|
212 |
with gr.Tab("语音"):
|
|
|
73 |
return text_results, out_prob.squeeze(0).numpy(), text_lab[-1], Audio
|
74 |
|
75 |
|
76 |
+
def speech_score(audio):
|
77 |
+
print(type(audio))
|
78 |
+
print(audio)
|
79 |
+
sample_rate, signal = audio # 这是语音的输入
|
80 |
+
signal = signal.astype(np.float32)
|
81 |
+
signal /= np.max(np.abs(signal))
|
82 |
+
sf.write("data/a.wav", signal, sample_rate)
|
83 |
+
signal, sample_rate = torchaudio.load("data/a.wav")
|
84 |
+
signal1 = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(
|
85 |
+
signal
|
86 |
+
)
|
87 |
+
torchaudio.save("data/out.wav", signal1, 16000, encoding="PCM_S", bits_per_sample=16)
|
88 |
+
Audio = "data/out.wav"
|
89 |
+
speech, sample_rate = AudioReader.read_wav_file(Audio)
|
90 |
+
if signal == "none":
|
91 |
+
return "none", "none", "haha"
|
92 |
+
else:
|
93 |
+
segments = vad.segments_offline(speech)
|
94 |
+
text_results = ""
|
95 |
+
for part in segments:
|
96 |
+
_result = ASR_model.infer_offline(
|
97 |
+
speech[part[0] * 16 : part[1] * 16], hot_words="任意热词 空格分开"
|
98 |
+
)
|
99 |
+
text_results += punc.punctuate(_result)[0]
|
100 |
+
|
101 |
+
out_prob, score, index, text_lab = classifier.classify_batch(signal1)
|
102 |
+
print(type(out_prob.squeeze(0).numpy()))
|
103 |
+
print(out_prob.squeeze(0).numpy())
|
104 |
+
print(type(text_lab[-1]))
|
105 |
+
print(text_lab[-1])
|
106 |
+
#return text_results, out_prob.squeeze(0).numpy(), text_lab[-1], Audio
|
107 |
+
prob=out_prob.squeeze(0).numpy()
|
108 |
+
print(prob)
|
109 |
+
score2=10*prob2[0]-10*prob[1]
|
110 |
+
print("score2",score2)
|
111 |
+
print(text_lab[-1])
|
112 |
+
text_emo=text_api(text_results)
|
113 |
+
print(text_emo)
|
114 |
+
|
115 |
+
return score2,text_emo
|
116 |
+
|
117 |
#########################################视频部分###################################
|
118 |
def clear_dynamic_info():
|
119 |
return (
|
|
|
246 |
inputs=[input_video],
|
247 |
outputs=[score1,score2,result3],
|
248 |
)
|
249 |
+
###################################################################
|
250 |
+
def clear_2():
|
251 |
+
return (
|
252 |
+
gr.Audio(value=None),
|
253 |
+
gr.Textbox(""),
|
254 |
+
gr.Textbox(""),
|
255 |
+
)
|
256 |
+
|
257 |
|
258 |
+
with gr.Blocks() as speech_all:
|
259 |
+
with gr.Row():
|
260 |
+
with gr.Column(scale=2):
|
261 |
+
input_audio=gr.Audio()
|
262 |
+
with gr.Row():
|
263 |
+
clear_audio = gr.Button(
|
264 |
+
value="Clear", interactive=True, scale=1
|
265 |
+
)
|
266 |
+
submit_audio = gr.Button(
|
267 |
+
value="Score", interactive=True, scale=1, elem_classes="submit"
|
268 |
+
)
|
269 |
+
with gr.Column(scale=2):
|
270 |
+
score2=gr.Textbox(interactive=False)
|
271 |
+
text_emo=gr.Textbox(interactive=False)
|
272 |
+
|
273 |
+
clear_audio.click(
|
274 |
+
fn=clear2,
|
275 |
+
outputs=[input_audio,score2,text_emo]
|
276 |
+
)
|
277 |
+
submit_audio.click(
|
278 |
+
fn=speech_score,
|
279 |
+
inputs=[input_audio],
|
280 |
+
outputs=[score2,text_emo],
|
281 |
+
)
|
282 |
|
283 |
+
|
284 |
|
285 |
with gr.Blocks() as app:
|
286 |
with gr.Tab("语音"):
|