from transformers import pipeline from openai import OpenAI import gradio as gr import os import dotenv api_key = os.getenv("OPENROUTER_API_KEY") client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key=api_key, ) asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h") def speech_to_text(speech): text = asr(speech)["text"] text = text.lower() return text def chatbot(message, history): messages = [] for human, assistant in history: messages.append({"role": "user", "content": human}) messages.append({"role": "assistant", "content": assistant}) messages.append({"role": "user", "content": message}) response = client.chat.completions.create( model="meta-llama/llama-3.1-405b-instruct:free", messages=messages, temperature=0.7, max_tokens=1000 ) return response.choices[0].message.content with gr.Blocks() as demo: with gr.Row(): audio_file = gr.Audio(type="filepath", label="Sube tu archivo de audio") text_input = gr.Textbox(label="Texto transcrito (o ingresa tu mensaje)", placeholder="El texto aparecerá aquí...") text_output = gr.Textbox(label="Respuesta del Chatbot", placeholder="La respuesta del chatbot aparecerá aquí...", interactive=False) b1 = gr.Button("Reconocer Audio y Preguntar al Chatbot") def process_audio_and_chat(audio_input, history): if audio_input: audio_text = speech_to_text(audio_input) response = chatbot(audio_text, history) print(response) return audio_text, response return "", "Por favor, sube un archivo de audio." b1.click(process_audio_and_chat, inputs=[audio_file, text_input], outputs=[text_input, text_output]) demo.launch(debug=True)