Audio-to-Text / app.py
Blandskron's picture
Update app.py
043abd9 verified
raw
history blame
1.26 kB
import gradio as gr
from transformers import pipeline
from pydub import AudioSegment
from pydub.utils import make_chunks
import os
# Modelo ASR
modelo = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-spanish")
def dividir_audio(input_path, output_dir, chunk_length_ms=30000):
audio = AudioSegment.from_file(input_path)
chunks = make_chunks(audio, chunk_length_ms)
os.makedirs(output_dir, exist_ok=True)
for i, chunk in enumerate(chunks):
chunk_name = os.path.join(output_dir, f"chunk_{i}.wav")
chunk.export(chunk_name, format="wav")
def transcribir_audio(audio_path):
return modelo(audio_path)["text"]
def transcribir(audio_file):
output_dir = "chunks"
dividir_audio(audio_file, output_dir)
transcripcion_final = []
for filename in sorted(os.listdir(output_dir)):
if filename.endswith(".wav"):
filepath = os.path.join(output_dir, filename)
transcripcion_final.append(transcribir_audio(filepath))
return " ".join(transcripcion_final)
with gr.Interface(
fn=transcribir,
inputs=gr.Audio(type="file", label="Sube tu archivo de audio"),
outputs="text",
title="Transcriptor de Audio"
) as interfaz:
interfaz.launch()