salomonsky commited on
Commit
8307fd0
verified
1 Parent(s): f03e9a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -97
app.py CHANGED
@@ -1,81 +1,13 @@
1
  import streamlit as st
2
  from huggingface_hub import InferenceClient
 
3
  import base64
4
  from pydub import AudioSegment
5
- from io import BytesIO
6
- from gtts import gTTS
7
- from streamlit_webrtc import webrtc_streamer, WebRtcMode
8
- import speech_recognition as sr
9
- import sounddevice as sd
10
 
11
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
12
- pre_prompt = ""
13
  pre_prompt_sent = False
14
- webrtc_ctx = None
15
-
16
- def take_user_input():
17
- r = sr.Recognizer()
18
-
19
- def audio_callback(in_data, frame_count, time_info, status):
20
- global webrtc_ctx
21
- audio = sr.AudioData(
22
- in_data.tobytes(),
23
- sample_rate=webrtc_ctx.audio_sample_rate,
24
- sample_width=sd.default.dtype.itemsize
25
- )
26
-
27
- st.info('Reconociendo...')
28
- query = transcribe_speech(audio)
29
-
30
- if 'salir' in query or 'detener' in query:
31
- speak("Hasta luego.")
32
- exit()
33
- return query
34
-
35
- global webrtc_ctx
36
- webrtc_ctx = webrtc_streamer(
37
- key="microphone",
38
- mode=WebRtcMode.SENDRECV,
39
- audio_receiver=audio_callback,
40
- async_processing=True,
41
- )
42
-
43
- if not webrtc_ctx:
44
- st.warning("Por favor, habilita el micr贸fono.")
45
- return 'None'
46
-
47
- st.info('Escuchando...')
48
-
49
- try:
50
- with sd.InputStream(callback=lambda indata, frames, time, status: None):
51
- while True:
52
- audio_data = webrtc_ctx.audio_receiver_stream.get()
53
- if audio_data:
54
- audio = sr.AudioData(
55
- audio_data.tobytes(),
56
- sample_rate=webrtc_ctx.audio_sample_rate,
57
- sample_width=audio_data.itemsize
58
- )
59
-
60
- st.info('Reconociendo...')
61
-
62
- query = transcribe_speech(audio)
63
-
64
- if 'salir' in query or 'detener' in query:
65
- speak("Hasta luego.")
66
- exit()
67
- return query
68
-
69
- except sr.UnknownValueError:
70
- speak('No se ha reconocido nada. Intenta de nuevo...')
71
- except sr.RequestError as e:
72
- st.error(f"Error en la solicitud al reconocimiento de voz: {e}")
73
-
74
- return 'None'
75
-
76
-
77
- def audio_callback(in_data, frame_count, time_info, status):
78
- return in_data, webrtc_ctx.audio_sample_rate
79
 
80
  def format_prompt(message, history):
81
  global pre_prompt_sent
@@ -91,13 +23,24 @@ def format_prompt(message, history):
91
 
92
  prompt += f"[INST] {message} [/INST]"
93
  return prompt
94
-
 
 
 
 
 
 
 
 
 
 
95
  def generate(user_input, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
96
  global pre_prompt_sent
97
  temperature = float(temperature) if temperature is not None else 0.9
98
  if temperature < 1e-2:
99
  temperature = 1e-2
100
  top_p = float(top_p)
 
101
  generate_kwargs = dict(
102
  temperature=temperature,
103
  max_new_tokens=max_new_tokens,
@@ -108,38 +51,30 @@ def generate(user_input, history, temperature=None, max_new_tokens=512, top_p=0.
108
  )
109
 
110
  formatted_prompt = format_prompt(user_input, history)
 
 
 
 
 
111
 
112
- try:
113
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
114
- response = ""
115
- for response_token in stream:
116
- response += response_token.token.text
117
- response = ' '.join(response.split()).replace('</s>', '')
118
- audio_bytes = text_to_speech(response)
119
- return response, audio_bytes
120
- except Exception as e:
121
- return str(e), None
122
-
123
- def text_to_speech(text):
124
- tts = gTTS(text=text, lang='es')
125
- audio_stream = BytesIO()
126
- tts.save(audio_stream)
127
- audio_stream.seek(0)
128
- return audio_stream.read()
129
 
130
- def speak(text):
131
- audio_bytes = text_to_speech(text)
132
- st.audio(audio_bytes, format="audio/mp3", start_time=0, key="audio_player")
133
 
134
  if "history" not in st.session_state:
135
  st.session_state.history = []
136
 
137
- user_input = take_user_input()
138
- output, audio_bytes = generate(user_input, history=st.session_state.history)
 
 
139
 
140
- with st.container(width=900, height=400):
141
- user_input_container = st.text_input("Tu entrada de usuario", value=user_input)
142
- st.text_area("Respuesta", value=output, key="output_text", disabled=True)
143
 
144
  if audio_bytes is not None:
145
  st.markdown(
 
1
  import streamlit as st
2
  from huggingface_hub import InferenceClient
3
+ from gtts import gTTS
4
  import base64
5
  from pydub import AudioSegment
6
+ from pydub.playback import play
 
 
 
 
7
 
8
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
9
+ pre_prompt = "tu nombre es Chaman 3.0 una IA conducual, tus principios son el trashuman铆smo ecol贸gico."
10
  pre_prompt_sent = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  def format_prompt(message, history):
13
  global pre_prompt_sent
 
23
 
24
  prompt += f"[INST] {message} [/INST]"
25
  return prompt
26
+
27
+ def text_to_speech(text, speed=1.3):
28
+ tts = gTTS(text=text, lang='es')
29
+ audio_file_path = 'output.mp3'
30
+ tts.save(audio_file_path)
31
+ sound = AudioSegment.from_mp3(audio_file_path)
32
+ sound = sound.speedup(playback_speed=speed)
33
+ sound.export(audio_file_path, format="mp3")
34
+
35
+ return audio_file_path
36
+
37
  def generate(user_input, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
38
  global pre_prompt_sent
39
  temperature = float(temperature) if temperature is not None else 0.9
40
  if temperature < 1e-2:
41
  temperature = 1e-2
42
  top_p = float(top_p)
43
+
44
  generate_kwargs = dict(
45
  temperature=temperature,
46
  max_new_tokens=max_new_tokens,
 
51
  )
52
 
53
  formatted_prompt = format_prompt(user_input, history)
54
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
55
+ response = ""
56
+
57
+ for response_token in stream:
58
+ response += response_token.token.text
59
 
60
+ response = ' '.join(response.split()).replace('</s>', '')
61
+
62
+ audio_file_path = text_to_speech(response, speed=1.3)
63
+ audio_file = open(audio_file_path, 'rb')
64
+ audio_bytes = audio_file.read()
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
+ return response, audio_bytes
 
 
67
 
68
  if "history" not in st.session_state:
69
  st.session_state.history = []
70
 
71
+ with st.container():
72
+ user_input = st.text_input(label="Usuario", value="")
73
+ output, audio_bytes = generate(user_input, history=st.session_state.history)
74
+ st.text_area("Respuesta", height=400, value=output, key="output_text", disabled=True)
75
 
76
+ if user_input:
77
+ st.session_state.history.append((user_input, output))
 
78
 
79
  if audio_bytes is not None:
80
  st.markdown(