Liusuthu commited on
Commit
0329c7d
·
verified ·
1 Parent(s): 51fa915

Update consult.py

Browse files
Files changed (1) hide show
  1. consult.py +126 -0
consult.py CHANGED
@@ -36,6 +36,132 @@ os.environ["no_proxy"] = "localhost,127.0.0.1,::1"
36
  client = Client("Liusuthu/TextDepression")
37
 
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  # constants
40
  schema = "情感倾向[正向,负向]" # Define the schema for sentence-level sentiment classification
41
  # 定义音频模态下权重
 
36
  client = Client("Liusuthu/TextDepression")
37
 
38
 
39
+ classifier = foreign_class(
40
+ source="pretrained_models/local-speechbrain/emotion-recognition-wav2vec2-IEMOCAP", # ".\\emotion-recognition-wav2vec2-IEMOCAP"
41
+ pymodule_file="custom_interface.py",
42
+ classname="CustomEncoderWav2vec2Classifier",
43
+ savedir="pretrained_models/local-speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
44
+ )
45
+ ASR_model = ParaformerOffline()
46
+ vad = FSMNVad()
47
+ punc = CttPunctuator()
48
+
49
+ def text_api(text:str):
50
+ result = client.predict(
51
+ text, # str in '输入文字' Textbox component
52
+ api_name="/predict",
53
+ )
54
+ return result
55
+
56
+
57
+ def get_text_score(text):
58
+ string=text_api(text)
59
+ part1 = str.partition(string, r"text")
60
+ want1 = part1[2]
61
+ label = want1[4:6]
62
+ part2 = str.partition(string, r"probability")
63
+ want2 = part2[2]
64
+ prob = float(want2[3:-4])
65
+ return label, prob
66
+
67
+ def classify_continuous(audio):
68
+ print(type(audio))
69
+ print(audio)
70
+ sample_rate, signal = audio # 这是语音的输入
71
+ signal = signal.astype(np.float32)
72
+ signal /= np.max(np.abs(signal))
73
+ sf.write("data/a.wav", signal, sample_rate)
74
+ signal, sample_rate = torchaudio.load("data/a.wav")
75
+ signal1 = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(
76
+ signal
77
+ )
78
+ torchaudio.save("data/out.wav", signal1, 16000, encoding="PCM_S", bits_per_sample=16)
79
+ Audio = "data/out.wav"
80
+ speech, sample_rate = AudioReader.read_wav_file(Audio)
81
+ if signal == "none":
82
+ return "none", "none", "haha"
83
+ else:
84
+ segments = vad.segments_offline(speech)
85
+ text_results = ""
86
+ for part in segments:
87
+ _result = ASR_model.infer_offline(
88
+ speech[part[0] * 16 : part[1] * 16], hot_words="任意热词 空格分开"
89
+ )
90
+ text_results += punc.punctuate(_result)[0]
91
+
92
+ out_prob, score, index, text_lab = classifier.classify_batch(signal1)
93
+ print(type(out_prob.squeeze(0).numpy()))
94
+ print(out_prob.squeeze(0).numpy())
95
+ print(type(text_lab[-1]))
96
+ print(text_lab[-1])
97
+ return text_results, out_prob.squeeze(0).numpy(), text_lab[-1], Audio
98
+
99
+
100
+
101
+ #######################################################################
102
+ #第四题专用函数:
103
+ def text_score4(text):
104
+ string=text_api(text)
105
+ part1 = str.partition(string, r"text")
106
+ want1 = part1[2]
107
+ label = want1[4:6]
108
+ part2 = str.partition(string, r"probability")
109
+ want2 = part2[2]
110
+ prob = float(want2[3:-4])
111
+ if label=="正向":
112
+ score=-prob*10
113
+ else:
114
+ score=prob*10
115
+ return text,score,gr.Column(visible=True)
116
+
117
+
118
+ def speech_score4(audio):
119
+ print(type(audio))
120
+ print(audio)
121
+ sample_rate, signal = audio # 这是语音的输入
122
+ signal = signal.astype(np.float32)
123
+ signal /= np.max(np.abs(signal))
124
+ sf.write("data/a.wav", signal, sample_rate)
125
+ signal, sample_rate = torchaudio.load("data/a.wav")
126
+ signal1 = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(
127
+ signal
128
+ )
129
+ torchaudio.save("data/out.wav", signal1, 16000, encoding="PCM_S", bits_per_sample=16)
130
+ Audio = "data/out.wav"
131
+ speech, sample_rate = AudioReader.read_wav_file(Audio)
132
+ if signal == "none":
133
+ return "none", "none", "haha"
134
+ else:
135
+ segments = vad.segments_offline(speech)
136
+ text_results = ""
137
+ for part in segments:
138
+ _result = ASR_model.infer_offline(
139
+ speech[part[0] * 16 : part[1] * 16], hot_words="任意热词 空格分开"
140
+ )
141
+ text_results += punc.punctuate(_result)[0]
142
+
143
+ out_prob, score, index, text_lab = classifier.classify_batch(signal1)
144
+ print(type(out_prob.squeeze(0).numpy()))
145
+ print(out_prob.squeeze(0).numpy())
146
+ print(type(text_lab[-1]))
147
+ print(text_lab[-1])
148
+ #return text_results, out_prob.squeeze(0).numpy(), text_lab[-1], Audio
149
+ prob=out_prob.squeeze(0).numpy()
150
+ print(prob)
151
+ score2=10*prob[0]-10*prob[1]
152
+ print("score2",score2)
153
+ print(text_lab[-1])
154
+ text,score1=text_score4(text_results)
155
+ # text_emo=str(get_text_score(text_results))
156
+ print(text,score1)
157
+ score=score1+score2
158
+
159
+ return text,score,gr.Column(visible=True)
160
+
161
+
162
+
163
+ #####################################################################
164
+
165
  # constants
166
  schema = "情感倾向[正向,负向]" # Define the schema for sentence-level sentiment classification
167
  # 定义音频模态下权重