Gabriel Vidal-Ayrinhac commited on
Commit
9d076e3
·
1 Parent(s): 50d4732

process audio in memory

Browse files
src/hackathon/server/server.py CHANGED
@@ -1,4 +1,3 @@
1
- import os
2
  from typing import Annotated, Dict, List
3
 
4
  from dotenv import load_dotenv
@@ -22,11 +21,7 @@ from hackathon.server.schemas import (
22
  StartRequest,
23
  StartResponse,
24
  )
25
- from hackathon.speech.speech import (
26
- read_audio_config,
27
- read_audio_file,
28
- text_to_speech_file,
29
- )
30
 
31
  load_dotenv()
32
 
@@ -149,7 +144,7 @@ async def infer(
149
  current_speaker.update_emotions(input_text)
150
  msg = current_speaker.respond(input_text)
151
 
152
- audio_file_path = text_to_speech_file(
153
  text=msg,
154
  voice_id=current_audio_config["voice_id"],
155
  stability=current_audio_config["stability"],
@@ -158,9 +153,6 @@ async def infer(
158
  base_path=str(data_folder),
159
  )
160
 
161
- audio_signal = read_audio_file(audio_file_path) # base64
162
- os.remove(audio_file_path)
163
-
164
  return {
165
  "generated_text": msg,
166
  "anger": current_speaker.emotions["anger"],
@@ -247,7 +239,7 @@ async def cards(
247
 
248
  data_folder = game_engine.data_folder
249
 
250
- audio_file_path = text_to_speech_file(
251
  text=msg,
252
  voice_id=current_audio_config["voice_id"],
253
  stability=current_audio_config["stability"],
@@ -256,10 +248,6 @@ async def cards(
256
  base_path=str(data_folder),
257
  )
258
 
259
- audio_signal = read_audio_file(audio_file_path) # base64
260
-
261
- os.remove(audio_file_path)
262
-
263
  return {"presenter_question": msg, "audio": audio_signal}
264
 
265
 
 
 
1
  from typing import Annotated, Dict, List
2
 
3
  from dotenv import load_dotenv
 
21
  StartRequest,
22
  StartResponse,
23
  )
24
+ from hackathon.speech.speech import read_audio_config, text_to_speech_file
 
 
 
 
25
 
26
  load_dotenv()
27
 
 
144
  current_speaker.update_emotions(input_text)
145
  msg = current_speaker.respond(input_text)
146
 
147
+ audio_signal = text_to_speech_file(
148
  text=msg,
149
  voice_id=current_audio_config["voice_id"],
150
  stability=current_audio_config["stability"],
 
153
  base_path=str(data_folder),
154
  )
155
 
 
 
 
156
  return {
157
  "generated_text": msg,
158
  "anger": current_speaker.emotions["anger"],
 
239
 
240
  data_folder = game_engine.data_folder
241
 
242
+ audio_signal = text_to_speech_file(
243
  text=msg,
244
  voice_id=current_audio_config["voice_id"],
245
  stability=current_audio_config["stability"],
 
248
  base_path=str(data_folder),
249
  )
250
 
 
 
 
 
251
  return {"presenter_question": msg, "audio": audio_signal}
252
 
253
 
src/hackathon/speech/speech.py CHANGED
@@ -1,7 +1,6 @@
1
  # Code copied from https://elevenlabs.io/docs/cookbooks/text-to-speech/streaming
2
 
3
  import base64
4
- import uuid
5
  from io import BytesIO
6
  from typing import IO
7
 
@@ -57,16 +56,13 @@ def text_to_speech_file(
57
  ),
58
  )
59
 
60
- save_file_path = f"./{uuid.uuid4()}.mp3"
61
-
62
- with open(save_file_path, "wb") as f:
63
- for chunk in response:
64
- if chunk:
65
- f.write(chunk)
66
-
67
- print(f"{save_file_path}: audio file successfully saved !")
68
-
69
- return save_file_path
70
 
71
 
72
  def text_to_speech_stream(
 
1
  # Code copied from https://elevenlabs.io/docs/cookbooks/text-to-speech/streaming
2
 
3
  import base64
 
4
  from io import BytesIO
5
  from typing import IO
6
 
 
56
  ),
57
  )
58
 
59
+ audio_data = BytesIO()
60
+ for chunk in response:
61
+ if chunk:
62
+ audio_data.write(chunk)
63
+ audio_data.seek(0)
64
+ audio_base64 = base64.b64encode(audio_data.read()).decode("utf-8")
65
+ return audio_base64
 
 
 
66
 
67
 
68
  def text_to_speech_stream(