Spaces:

brestok
/

real-estate

Running

App Files Files Community

real-estate / project /bot /records.py

brestok

init

d0e0a14 2 months ago

raw

history blame contribute delete

4.42 kB

	import asyncio
	import os
	import tempfile
	import datetime
	import base64

	import aiofiles
	from openai import BadRequestError
	from sqlalchemy import select
	from sqlalchemy.ext.asyncio import AsyncSession

	from project.bot.models import AudioRecord
	from project.config import settings
	from project.database import get_user_db, get_async_session
	from project.users.models import User


	class VoiceRecord:
	file_path: str = None
	transcription: str = ''
	current_user: User = None
	session: AsyncSession = None

	def __init__(self):
	current_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
	file_name = f"voice_record_{current_time}.mp3"
	self.file_path = str(settings.BASE_DIR / 'project' / 'records' / file_name)

	async def initialize(self):
	self.session = await get_async_session()
	current_user = await self.session.execute(select(User))
	self.current_user = current_user.scalars().first()

	async def save(self):
	record = AudioRecord()
	record.audio_path = self.file_path
	record.transcription = self.transcription
	record.folder = await self.current_user.get_base_folder(session=self.session)
	self.session.add(record)
	await self.session.commit()
	await self.session.close()


	class RealTimeVoiceRecord(VoiceRecord):
	def __init__(self):
	super(RealTimeVoiceRecord, self).__init__()
	self.queue = asyncio.Queue()
	self.latest_transcription = None
	self.is_finished = False
	asyncio.create_task(self.process_queue())

	async def add_new_bytes_to_file(self, data_bytes: bytes):
	async with aiofiles.open(self.file_path, 'wb') as f:
	await f.write(data_bytes)

	async def add_to_queue(self, data_bytes: str, is_finished: bool, full_audio: str):
	await self.queue.put((data_bytes, is_finished, full_audio))

	async def process_queue(self):
	while True:
	data_bytes, is_finished, full_audio = await self.queue.get()
	await self.get_transcription(data_bytes, is_finished, full_audio)
	self.queue.task_done()

	@staticmethod
	async def _transform_bytes_to_tmp_file(data_bytes: bytes) -> str:
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
	async with aiofiles.open(temp_file.name, 'wb') as f:
	await f.write(data_bytes)
	return temp_file.name

	@staticmethod
	async def _transcript_audio(temp_filepath: str):
	with open(temp_filepath, 'rb') as file:
	try:
	transcript = await settings.OPENAI_CLIENT.audio.transcriptions.create(
	model='whisper-1',
	file=file,
	)
	text = transcript.text
	except BadRequestError:
	text = ''
	return text

	async def improve_transcription(self):
	with open(self.file_path, 'rb') as f:
	transcript = await settings.OPENAI_CLIENT.audio.transcriptions.create(
	model='whisper-1',
	file=f,
	)
	text = transcript.text
	self.transcription = text

	async def get_transcription(self, data_bytes: str, is_finished: bool, full_audio: str):
	data_bytes = base64.b64decode(data_bytes)
	if full_audio:
	await self.add_new_bytes_to_file(base64.b64decode(full_audio))
	temp_filepath = await self._transform_bytes_to_tmp_file(data_bytes)
	chunk = await self._transcript_audio(temp_filepath)
	print(chunk)
	os.remove(temp_filepath)
	self.latest_transcription = chunk
	if is_finished:
	print('finish')
	self.is_finished = True


	class FileVoiceRecord(VoiceRecord):
	def __init__(self, data_bytes: bytes):
	super(FileVoiceRecord, self).__init__()
	self.data_bytes = data_bytes
	self._transform_bytes_to_file()
	self.transcription = self.generate_transcription()

	def _transform_bytes_to_file(self):
	with open(self.file_path, 'wb') as f:
	f.write(self.data_bytes)

	def generate_transcription(self):
	with open(self.file_path, 'rb') as f:
	transcript = settings.OPENAI_CLIENT.audio.transcriptions.create(
	model='whisper-1',
	file=f
	)
	text = transcript.text
	return text