import gradio as gr from transformers import pipeline from pydub import AudioSegment from pydub.utils import make_chunks import os # Modelo ASR modelo = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-spanish") def dividir_audio(input_path, output_dir, chunk_length_ms=30000): audio = AudioSegment.from_file(input_path) chunks = make_chunks(audio, chunk_length_ms) os.makedirs(output_dir, exist_ok=True) for i, chunk in enumerate(chunks): chunk_name = os.path.join(output_dir, f"chunk_{i}.wav") chunk.export(chunk_name, format="wav") def transcribir_audio(audio_path): return modelo(audio_path)["text"] def transcribir(audio_file): output_dir = "chunks" dividir_audio(audio_file, output_dir) transcripcion_final = [] for filename in sorted(os.listdir(output_dir)): if filename.endswith(".wav"): filepath = os.path.join(output_dir, filename) transcripcion_final.append(transcribir_audio(filepath)) return " ".join(transcripcion_final) with gr.Interface( fn=transcribir, inputs=gr.Audio(type="file", label="Sube tu archivo de audio"), outputs="text", title="Transcriptor de Audio" ) as interfaz: interfaz.launch()