Spaces:
Sleeping
Sleeping
import gradio as gr | |
import whisper | |
from transformers import pipeline | |
# Load Whisper model | |
whisper_model = whisper.load_model("small", device="cpu") | |
# Load the text correction model | |
correction_pipeline = pipeline("text2text-generation", model="tiiuae/falcon3-1b-instruct", device=-1) | |
# Function to preprocess audio and transcribe it using Whisper | |
def transcribe_audio(audio_file): | |
transcription = whisper_model.transcribe(audio_file) | |
return transcription["text"] | |
# Function to correct grammar in text | |
def chunk_text(text, max_tokens=2000): | |
""" | |
Splits the text into smaller chunks to ensure it doesn't exceed the token limit. | |
""" | |
words = text.split() | |
chunks = [] | |
chunk = [] | |
current_tokens = 0 | |
for word in words: | |
word_tokens = len(word.split()) # Approximate token count | |
if current_tokens + word_tokens > max_tokens: | |
chunks.append(" ".join(chunk)) | |
chunk = [word] | |
current_tokens = word_tokens | |
else: | |
chunk.append(word) | |
current_tokens += word_tokens | |
if chunk: | |
chunks.append(" ".join(chunk)) | |
return chunks | |
# Function to process the pipeline | |
def process_pipeline(audio_file): | |
raw_transcription = transcribe_audio(audio_file) | |
corrected_transcription = correct_text(raw_transcription) | |
return raw_transcription, corrected_transcription | |
# Gradio Interface | |
interface = gr.Interface( | |
fn=process_pipeline, | |
inputs=gr.Audio(type="filepath", label="Upload Audio"), | |
outputs=[ | |
gr.Textbox(label="Raw Transcription"), | |
gr.Textbox(label="Corrected Transcription"), | |
], | |
title="Speech Correction Demo", | |
description="Upload an audio file to see raw transcription and grammar-corrected output.", | |
) | |
# Launch the app | |
interface.launch(share=True) | |