File size: 4,420 Bytes
d0e0a14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import asyncio
import os
import tempfile
import datetime
import base64

import aiofiles
from openai import BadRequestError
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession

from project.bot.models import AudioRecord
from project.config import settings
from project.database import get_user_db, get_async_session
from project.users.models import User


class VoiceRecord:
    file_path: str = None
    transcription: str = ''
    current_user: User = None
    session: AsyncSession = None

    def __init__(self):
        current_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
        file_name = f"voice_record_{current_time}.mp3"
        self.file_path = str(settings.BASE_DIR / 'project' / 'records' / file_name)

    async def initialize(self):
        self.session = await get_async_session()
        current_user = await self.session.execute(select(User))
        self.current_user = current_user.scalars().first()

    async def save(self):
        record = AudioRecord()
        record.audio_path = self.file_path
        record.transcription = self.transcription
        record.folder = await self.current_user.get_base_folder(session=self.session)
        self.session.add(record)
        await self.session.commit()
        await self.session.close()


class RealTimeVoiceRecord(VoiceRecord):
    def __init__(self):
        super(RealTimeVoiceRecord, self).__init__()
        self.queue = asyncio.Queue()
        self.latest_transcription = None
        self.is_finished = False
        asyncio.create_task(self.process_queue())

    async def add_new_bytes_to_file(self, data_bytes: bytes):
        async with aiofiles.open(self.file_path, 'wb') as f:
            await f.write(data_bytes)

    async def add_to_queue(self, data_bytes: str, is_finished: bool, full_audio: str):
        await self.queue.put((data_bytes, is_finished, full_audio))

    async def process_queue(self):
        while True:
            data_bytes, is_finished, full_audio = await self.queue.get()
            await self.get_transcription(data_bytes, is_finished, full_audio)
            self.queue.task_done()

    @staticmethod
    async def _transform_bytes_to_tmp_file(data_bytes: bytes) -> str:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
        async with aiofiles.open(temp_file.name, 'wb') as f:
            await f.write(data_bytes)
            return temp_file.name

    @staticmethod
    async def _transcript_audio(temp_filepath: str):
        with open(temp_filepath, 'rb') as file:
            try:
                transcript = await settings.OPENAI_CLIENT.audio.transcriptions.create(
                    model='whisper-1',
                    file=file,
                )
                text = transcript.text
            except BadRequestError:
                text = ''
        return text

    async def improve_transcription(self):
        with open(self.file_path, 'rb') as f:
            transcript = await settings.OPENAI_CLIENT.audio.transcriptions.create(
                model='whisper-1',
                file=f,
            )
            text = transcript.text
        self.transcription = text

    async def get_transcription(self, data_bytes: str, is_finished: bool, full_audio: str):
        data_bytes = base64.b64decode(data_bytes)
        if full_audio:
            await self.add_new_bytes_to_file(base64.b64decode(full_audio))
        temp_filepath = await self._transform_bytes_to_tmp_file(data_bytes)
        chunk = await self._transcript_audio(temp_filepath)
        print(chunk)
        os.remove(temp_filepath)
        self.latest_transcription = chunk
        if is_finished:
            print('finish')
            self.is_finished = True


class FileVoiceRecord(VoiceRecord):
    def __init__(self, data_bytes: bytes):
        super(FileVoiceRecord, self).__init__()
        self.data_bytes = data_bytes
        self._transform_bytes_to_file()
        self.transcription = self.generate_transcription()

    def _transform_bytes_to_file(self):
        with open(self.file_path, 'wb') as f:
            f.write(self.data_bytes)

    def generate_transcription(self):
        with open(self.file_path, 'rb') as f:
            transcript = settings.OPENAI_CLIENT.audio.transcriptions.create(
                model='whisper-1',
                file=f
            )
        text = transcript.text
        return text