File size: 2,400 Bytes
890de26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# -*- coding:utf-8 -*-
import logging

from paraformer import AudioReader, CttPunctuator, FSMNVad, ParaformerOffline
import numpy as np
import sounddevice as sd
import time
import soundfile as sf
import chardet
import torch

logging.basicConfig(
    level=logging.INFO,
    format="[%(asctime)s %(levelname)s] [%(filename)s:%(lineno)d %(module)s.%(funcName)s] %(message)s",
)
recorded_audio = []
sample_rate = 16000

def luyin():
    def callback(indata, frames, time, status):
        if status:
            print('录音错误:', status)
        if recording:
            # 将录音数据追加到变量中
            # if indata.copy()>1.5 or indata.copy()< -1.5:
            arr = np.array(indata.copy())  # 假设数组中有416个元素

            sum_value = np.sum(arr)


            recorded_audio.append(indata.copy())

    a = int(input('请输入数字1开始:'))
    if a == 1:
        recording = True
        stream = sd.InputStream(callback=callback, channels=1, samplerate=sample_rate, blocksize=4096)
        stream.start()
        begin = time.time()
        b = int(input('请输入数字2停止:'))
        if b == 2:
            recording = False
            print("Stop recording")
            stream.stop()
            fina = time.time()
            t = fina - begin
            print('录音时间为%ds' % t)
            # print(recorded_audio)
            if len(recorded_audio) == 0:
                return "none"
            else:
                signal = np.vstack(recorded_audio)




                sf.write("out.wav",np.array(signal),sample_rate)
                # signal = torch.from_numpy(np.squeeze(signal)).float()
                # print(signal)
                # # result = chardet.detect(signal)
                # # print(result['encoding'])
                # recorded_audio.clear()
                return signal



if __name__ == "__main__":
    logging.info("Testing offline asr")
    luyin()
    audio = "out.wav"
    speech, sample_rate = AudioReader.read_wav_file(audio)
    model = ParaformerOffline()
    vad = FSMNVad()
    punc = CttPunctuator()

    segments = vad.segments_offline(speech)
    results = ""
    for part in segments:
        _result = model.infer_offline(
            speech[part[0] * 16 : part[1] * 16], hot_words="任意热词 空格分开"
        )
        results += punc.punctuate(_result)[0]
    logging.info(results)