Spaces:
Running
Running
Gabriel Vidal-Ayrinhac
commited on
Commit
·
9d076e3
1
Parent(s):
50d4732
process audio in memory
Browse files- src/hackathon/server/server.py +3 -15
- src/hackathon/speech/speech.py +7 -11
src/hackathon/server/server.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
import os
|
2 |
from typing import Annotated, Dict, List
|
3 |
|
4 |
from dotenv import load_dotenv
|
@@ -22,11 +21,7 @@ from hackathon.server.schemas import (
|
|
22 |
StartRequest,
|
23 |
StartResponse,
|
24 |
)
|
25 |
-
from hackathon.speech.speech import
|
26 |
-
read_audio_config,
|
27 |
-
read_audio_file,
|
28 |
-
text_to_speech_file,
|
29 |
-
)
|
30 |
|
31 |
load_dotenv()
|
32 |
|
@@ -149,7 +144,7 @@ async def infer(
|
|
149 |
current_speaker.update_emotions(input_text)
|
150 |
msg = current_speaker.respond(input_text)
|
151 |
|
152 |
-
|
153 |
text=msg,
|
154 |
voice_id=current_audio_config["voice_id"],
|
155 |
stability=current_audio_config["stability"],
|
@@ -158,9 +153,6 @@ async def infer(
|
|
158 |
base_path=str(data_folder),
|
159 |
)
|
160 |
|
161 |
-
audio_signal = read_audio_file(audio_file_path) # base64
|
162 |
-
os.remove(audio_file_path)
|
163 |
-
|
164 |
return {
|
165 |
"generated_text": msg,
|
166 |
"anger": current_speaker.emotions["anger"],
|
@@ -247,7 +239,7 @@ async def cards(
|
|
247 |
|
248 |
data_folder = game_engine.data_folder
|
249 |
|
250 |
-
|
251 |
text=msg,
|
252 |
voice_id=current_audio_config["voice_id"],
|
253 |
stability=current_audio_config["stability"],
|
@@ -256,10 +248,6 @@ async def cards(
|
|
256 |
base_path=str(data_folder),
|
257 |
)
|
258 |
|
259 |
-
audio_signal = read_audio_file(audio_file_path) # base64
|
260 |
-
|
261 |
-
os.remove(audio_file_path)
|
262 |
-
|
263 |
return {"presenter_question": msg, "audio": audio_signal}
|
264 |
|
265 |
|
|
|
|
|
1 |
from typing import Annotated, Dict, List
|
2 |
|
3 |
from dotenv import load_dotenv
|
|
|
21 |
StartRequest,
|
22 |
StartResponse,
|
23 |
)
|
24 |
+
from hackathon.speech.speech import read_audio_config, text_to_speech_file
|
|
|
|
|
|
|
|
|
25 |
|
26 |
load_dotenv()
|
27 |
|
|
|
144 |
current_speaker.update_emotions(input_text)
|
145 |
msg = current_speaker.respond(input_text)
|
146 |
|
147 |
+
audio_signal = text_to_speech_file(
|
148 |
text=msg,
|
149 |
voice_id=current_audio_config["voice_id"],
|
150 |
stability=current_audio_config["stability"],
|
|
|
153 |
base_path=str(data_folder),
|
154 |
)
|
155 |
|
|
|
|
|
|
|
156 |
return {
|
157 |
"generated_text": msg,
|
158 |
"anger": current_speaker.emotions["anger"],
|
|
|
239 |
|
240 |
data_folder = game_engine.data_folder
|
241 |
|
242 |
+
audio_signal = text_to_speech_file(
|
243 |
text=msg,
|
244 |
voice_id=current_audio_config["voice_id"],
|
245 |
stability=current_audio_config["stability"],
|
|
|
248 |
base_path=str(data_folder),
|
249 |
)
|
250 |
|
|
|
|
|
|
|
|
|
251 |
return {"presenter_question": msg, "audio": audio_signal}
|
252 |
|
253 |
|
src/hackathon/speech/speech.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
# Code copied from https://elevenlabs.io/docs/cookbooks/text-to-speech/streaming
|
2 |
|
3 |
import base64
|
4 |
-
import uuid
|
5 |
from io import BytesIO
|
6 |
from typing import IO
|
7 |
|
@@ -57,16 +56,13 @@ def text_to_speech_file(
|
|
57 |
),
|
58 |
)
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
print(f"{save_file_path}: audio file successfully saved !")
|
68 |
-
|
69 |
-
return save_file_path
|
70 |
|
71 |
|
72 |
def text_to_speech_stream(
|
|
|
1 |
# Code copied from https://elevenlabs.io/docs/cookbooks/text-to-speech/streaming
|
2 |
|
3 |
import base64
|
|
|
4 |
from io import BytesIO
|
5 |
from typing import IO
|
6 |
|
|
|
56 |
),
|
57 |
)
|
58 |
|
59 |
+
audio_data = BytesIO()
|
60 |
+
for chunk in response:
|
61 |
+
if chunk:
|
62 |
+
audio_data.write(chunk)
|
63 |
+
audio_data.seek(0)
|
64 |
+
audio_base64 = base64.b64encode(audio_data.read()).decode("utf-8")
|
65 |
+
return audio_base64
|
|
|
|
|
|
|
66 |
|
67 |
|
68 |
def text_to_speech_stream(
|