File size: 2,270 Bytes
726ec90
c27e5a4
726ec90
 
 
 
 
2ec9baa
 
 
 
 
 
 
 
 
 
 
 
 
726ec90
 
a00c4d5
 
c27e5a4
 
a00c4d5
9254534
8d626cf
a00c4d5
726ec90
 
6831f1f
726ec90
a00c4d5
457d4b2
726ec90
9e4dc76
726ec90
8d626cf
726ec90
 
 
 
 
 
 
c27e5a4
726ec90
c27e5a4
 
a00c4d5
726ec90
 
 
9254534
c27e5a4
457d4b2
 
c27e5a4
457d4b2
3256e71
c27e5a4
726ec90
 
 
 
2ec9baa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import io
from typing import Tuple
import threading
from multiprocessing import Queue
from queue import Empty
from faster_whisper import WhisperModel

import logging
import sys

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    handlers=[logging.StreamHandler(sys.stdout)],
)

# Get a logger for your app
logger = logging.getLogger(__name__)


class AudioTranscriber(threading.Thread):
    def __init__(
        self,
        audio_queue: "Queue[Tuple[io.BytesIO, str]]",
        text_queue: "Queue[Tuple[str, str]]",
        language: str = "en",
        confidence_threshold: float = 0.5,
        device_index: int = 0,
    ):
        super().__init__()
        self.audio_queue = audio_queue
        self.action_queue = text_queue
        self.daemon = True  # Thread will exit when main program exits
        self.language = language
        self.confidence_threshold = confidence_threshold
        self.transcriber = WhisperModel(
            "large",
            device="cuda",
            device_index=device_index,
            compute_type="int8",
        )

    def run(self):
        while True:
            try:
                # Wait for 1 second before timing out and checking again
                audio_data, session_id = self.audio_queue.get(timeout=1)

                segments, _ = self.transcriber.transcribe(
                    audio_data, language=self.language
                )

                # Put the transcription results in the output queue
                for segment in segments:
                    if segment.no_speech_prob <= self.confidence_threshold:
                        self.action_queue.put((segment.text, session_id))
                        # Still print for debugging
                        logger.info(
                            f"[Thread {threading.get_ident()}] [{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}"
                        )
                    else:
                        self.action_queue.put(("", session_id))

            except Empty:
                continue  # If queue is empty, continue waiting
            except Exception as e:
                logger.error(f"Error processing audio chunk: {e}")