Liusuthu commited on
Commit
20ce06a
·
verified ·
1 Parent(s): f076f86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py CHANGED
@@ -73,6 +73,47 @@ def classify_continuous(audio):
73
  return text_results, out_prob.squeeze(0).numpy(), text_lab[-1], Audio
74
 
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  #########################################视频部分###################################
77
  def clear_dynamic_info():
78
  return (
@@ -205,8 +246,41 @@ with gr.Blocks() as video_all:
205
  inputs=[input_video],
206
  outputs=[score1,score2,result3],
207
  )
 
 
 
 
 
 
 
 
208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
 
210
 
211
  with gr.Blocks() as app:
212
  with gr.Tab("语音"):
 
73
  return text_results, out_prob.squeeze(0).numpy(), text_lab[-1], Audio
74
 
75
 
76
+ def speech_score(audio):
77
+ print(type(audio))
78
+ print(audio)
79
+ sample_rate, signal = audio # 这是语音的输入
80
+ signal = signal.astype(np.float32)
81
+ signal /= np.max(np.abs(signal))
82
+ sf.write("data/a.wav", signal, sample_rate)
83
+ signal, sample_rate = torchaudio.load("data/a.wav")
84
+ signal1 = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(
85
+ signal
86
+ )
87
+ torchaudio.save("data/out.wav", signal1, 16000, encoding="PCM_S", bits_per_sample=16)
88
+ Audio = "data/out.wav"
89
+ speech, sample_rate = AudioReader.read_wav_file(Audio)
90
+ if signal == "none":
91
+ return "none", "none", "haha"
92
+ else:
93
+ segments = vad.segments_offline(speech)
94
+ text_results = ""
95
+ for part in segments:
96
+ _result = ASR_model.infer_offline(
97
+ speech[part[0] * 16 : part[1] * 16], hot_words="任意热词 空格分开"
98
+ )
99
+ text_results += punc.punctuate(_result)[0]
100
+
101
+ out_prob, score, index, text_lab = classifier.classify_batch(signal1)
102
+ print(type(out_prob.squeeze(0).numpy()))
103
+ print(out_prob.squeeze(0).numpy())
104
+ print(type(text_lab[-1]))
105
+ print(text_lab[-1])
106
+ #return text_results, out_prob.squeeze(0).numpy(), text_lab[-1], Audio
107
+ prob=out_prob.squeeze(0).numpy()
108
+ print(prob)
109
+ score2=10*prob2[0]-10*prob[1]
110
+ print("score2",score2)
111
+ print(text_lab[-1])
112
+ text_emo=text_api(text_results)
113
+ print(text_emo)
114
+
115
+ return score2,text_emo
116
+
117
  #########################################视频部分###################################
118
  def clear_dynamic_info():
119
  return (
 
246
  inputs=[input_video],
247
  outputs=[score1,score2,result3],
248
  )
249
+ ###################################################################
250
+ def clear_2():
251
+ return (
252
+ gr.Audio(value=None),
253
+ gr.Textbox(""),
254
+ gr.Textbox(""),
255
+ )
256
+
257
 
258
+ with gr.Blocks() as speech_all:
259
+ with gr.Row():
260
+ with gr.Column(scale=2):
261
+ input_audio=gr.Audio()
262
+ with gr.Row():
263
+ clear_audio = gr.Button(
264
+ value="Clear", interactive=True, scale=1
265
+ )
266
+ submit_audio = gr.Button(
267
+ value="Score", interactive=True, scale=1, elem_classes="submit"
268
+ )
269
+ with gr.Column(scale=2):
270
+ score2=gr.Textbox(interactive=False)
271
+ text_emo=gr.Textbox(interactive=False)
272
+
273
+ clear_audio.click(
274
+ fn=clear2,
275
+ outputs=[input_audio,score2,text_emo]
276
+ )
277
+ submit_audio.click(
278
+ fn=speech_score,
279
+ inputs=[input_audio],
280
+ outputs=[score2,text_emo],
281
+ )
282
 
283
+
284
 
285
  with gr.Blocks() as app:
286
  with gr.Tab("语音"):