File size: 2,040 Bytes
bbbf06e c0e541b bbbf06e 25df8a0 bbbf06e c0e541b bbbf06e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import pprint
import unittest
import numpy as np
import sys
sys.path.append('../whisper-webui')
from src.vad import AbstractTranscription, TranscriptionConfig, VadSileroTranscription
class TestVad(unittest.TestCase):
def __init__(self, *args, **kwargs):
super(TestVad, self).__init__(*args, **kwargs)
self.transcribe_calls = []
def test_transcript(self):
mock = MockVadTranscription()
self.transcribe_calls.clear()
result = mock.transcribe("mock", lambda segment : self.transcribe_segments(segment))
self.assertListEqual(self.transcribe_calls, [
[30, 30],
[100, 100]
])
self.assertListEqual(result['segments'],
[{'end': 50.0, 'start': 40.0, 'text': 'Hello world '},
{'end': 120.0, 'start': 110.0, 'text': 'Hello world '}]
)
def transcribe_segments(self, segment):
self.transcribe_calls.append(segment.tolist())
# Dummy text
return {
'text': "Hello world ",
'segments': [
{
"start": 10.0,
"end": 20.0,
"text": "Hello world "
}
],
'language': ""
}
class MockVadTranscription(AbstractTranscription):
def __init__(self):
super().__init__()
def get_audio_segment(self, str, start_time: str = None, duration: str = None):
start_time_seconds = float(start_time.removesuffix("s"))
duration_seconds = float(duration.removesuffix("s"))
# For mocking, this just returns a simple numppy array
return np.array([start_time_seconds, duration_seconds], dtype=np.float64)
def get_transcribe_timestamps(self, audio: str, config: TranscriptionConfig, start_time: float, duration: float):
result = []
result.append( { 'start': 30, 'end': 60 } )
result.append( { 'start': 100, 'end': 200 } )
return result
if __name__ == '__main__':
unittest.main() |