Spaces:
Runtime error
Runtime error
Update consult.py
Browse files- consult.py +126 -0
consult.py
CHANGED
@@ -36,6 +36,132 @@ os.environ["no_proxy"] = "localhost,127.0.0.1,::1"
|
|
36 |
client = Client("Liusuthu/TextDepression")
|
37 |
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
# constants
|
40 |
schema = "情感倾向[正向,负向]" # Define the schema for sentence-level sentiment classification
|
41 |
# 定义音频模态下权重
|
|
|
36 |
client = Client("Liusuthu/TextDepression")
|
37 |
|
38 |
|
39 |
+
classifier = foreign_class(
|
40 |
+
source="pretrained_models/local-speechbrain/emotion-recognition-wav2vec2-IEMOCAP", # ".\\emotion-recognition-wav2vec2-IEMOCAP"
|
41 |
+
pymodule_file="custom_interface.py",
|
42 |
+
classname="CustomEncoderWav2vec2Classifier",
|
43 |
+
savedir="pretrained_models/local-speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
|
44 |
+
)
|
45 |
+
ASR_model = ParaformerOffline()
|
46 |
+
vad = FSMNVad()
|
47 |
+
punc = CttPunctuator()
|
48 |
+
|
49 |
+
def text_api(text:str):
|
50 |
+
result = client.predict(
|
51 |
+
text, # str in '输入文字' Textbox component
|
52 |
+
api_name="/predict",
|
53 |
+
)
|
54 |
+
return result
|
55 |
+
|
56 |
+
|
57 |
+
def get_text_score(text):
|
58 |
+
string=text_api(text)
|
59 |
+
part1 = str.partition(string, r"text")
|
60 |
+
want1 = part1[2]
|
61 |
+
label = want1[4:6]
|
62 |
+
part2 = str.partition(string, r"probability")
|
63 |
+
want2 = part2[2]
|
64 |
+
prob = float(want2[3:-4])
|
65 |
+
return label, prob
|
66 |
+
|
67 |
+
def classify_continuous(audio):
|
68 |
+
print(type(audio))
|
69 |
+
print(audio)
|
70 |
+
sample_rate, signal = audio # 这是语音的输入
|
71 |
+
signal = signal.astype(np.float32)
|
72 |
+
signal /= np.max(np.abs(signal))
|
73 |
+
sf.write("data/a.wav", signal, sample_rate)
|
74 |
+
signal, sample_rate = torchaudio.load("data/a.wav")
|
75 |
+
signal1 = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(
|
76 |
+
signal
|
77 |
+
)
|
78 |
+
torchaudio.save("data/out.wav", signal1, 16000, encoding="PCM_S", bits_per_sample=16)
|
79 |
+
Audio = "data/out.wav"
|
80 |
+
speech, sample_rate = AudioReader.read_wav_file(Audio)
|
81 |
+
if signal == "none":
|
82 |
+
return "none", "none", "haha"
|
83 |
+
else:
|
84 |
+
segments = vad.segments_offline(speech)
|
85 |
+
text_results = ""
|
86 |
+
for part in segments:
|
87 |
+
_result = ASR_model.infer_offline(
|
88 |
+
speech[part[0] * 16 : part[1] * 16], hot_words="任意热词 空格分开"
|
89 |
+
)
|
90 |
+
text_results += punc.punctuate(_result)[0]
|
91 |
+
|
92 |
+
out_prob, score, index, text_lab = classifier.classify_batch(signal1)
|
93 |
+
print(type(out_prob.squeeze(0).numpy()))
|
94 |
+
print(out_prob.squeeze(0).numpy())
|
95 |
+
print(type(text_lab[-1]))
|
96 |
+
print(text_lab[-1])
|
97 |
+
return text_results, out_prob.squeeze(0).numpy(), text_lab[-1], Audio
|
98 |
+
|
99 |
+
|
100 |
+
|
101 |
+
#######################################################################
|
102 |
+
#第四题专用函数:
|
103 |
+
def text_score4(text):
|
104 |
+
string=text_api(text)
|
105 |
+
part1 = str.partition(string, r"text")
|
106 |
+
want1 = part1[2]
|
107 |
+
label = want1[4:6]
|
108 |
+
part2 = str.partition(string, r"probability")
|
109 |
+
want2 = part2[2]
|
110 |
+
prob = float(want2[3:-4])
|
111 |
+
if label=="正向":
|
112 |
+
score=-prob*10
|
113 |
+
else:
|
114 |
+
score=prob*10
|
115 |
+
return text,score,gr.Column(visible=True)
|
116 |
+
|
117 |
+
|
118 |
+
def speech_score4(audio):
|
119 |
+
print(type(audio))
|
120 |
+
print(audio)
|
121 |
+
sample_rate, signal = audio # 这是语音的输入
|
122 |
+
signal = signal.astype(np.float32)
|
123 |
+
signal /= np.max(np.abs(signal))
|
124 |
+
sf.write("data/a.wav", signal, sample_rate)
|
125 |
+
signal, sample_rate = torchaudio.load("data/a.wav")
|
126 |
+
signal1 = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(
|
127 |
+
signal
|
128 |
+
)
|
129 |
+
torchaudio.save("data/out.wav", signal1, 16000, encoding="PCM_S", bits_per_sample=16)
|
130 |
+
Audio = "data/out.wav"
|
131 |
+
speech, sample_rate = AudioReader.read_wav_file(Audio)
|
132 |
+
if signal == "none":
|
133 |
+
return "none", "none", "haha"
|
134 |
+
else:
|
135 |
+
segments = vad.segments_offline(speech)
|
136 |
+
text_results = ""
|
137 |
+
for part in segments:
|
138 |
+
_result = ASR_model.infer_offline(
|
139 |
+
speech[part[0] * 16 : part[1] * 16], hot_words="任意热词 空格分开"
|
140 |
+
)
|
141 |
+
text_results += punc.punctuate(_result)[0]
|
142 |
+
|
143 |
+
out_prob, score, index, text_lab = classifier.classify_batch(signal1)
|
144 |
+
print(type(out_prob.squeeze(0).numpy()))
|
145 |
+
print(out_prob.squeeze(0).numpy())
|
146 |
+
print(type(text_lab[-1]))
|
147 |
+
print(text_lab[-1])
|
148 |
+
#return text_results, out_prob.squeeze(0).numpy(), text_lab[-1], Audio
|
149 |
+
prob=out_prob.squeeze(0).numpy()
|
150 |
+
print(prob)
|
151 |
+
score2=10*prob[0]-10*prob[1]
|
152 |
+
print("score2",score2)
|
153 |
+
print(text_lab[-1])
|
154 |
+
text,score1=text_score4(text_results)
|
155 |
+
# text_emo=str(get_text_score(text_results))
|
156 |
+
print(text,score1)
|
157 |
+
score=score1+score2
|
158 |
+
|
159 |
+
return text,score,gr.Column(visible=True)
|
160 |
+
|
161 |
+
|
162 |
+
|
163 |
+
#####################################################################
|
164 |
+
|
165 |
# constants
|
166 |
schema = "情感倾向[正向,负向]" # Define the schema for sentence-level sentiment classification
|
167 |
# 定义音频模态下权重
|