VanguardAI's picture
Update app.py
d5685b0 verified
raw
history blame
2.75 kB
import sounddevice as sd
import scipy.io.wavfile as wavfile
import numpy as np
import gradio as gr
from groq import Groq
import tempfile
import os
class Recorder:
def __init__(self, sample_rate=44100):
self.recording = False
self.frames = []
self.sample_rate = sample_rate
self.stream = None
def toggle_recording(self):
if not self.recording:
self.frames = []
self.stream = sd.InputStream(callback=self.callback, channels=2, samplerate=self.sample_rate)
self.stream.start()
self.recording = True
return "Recording... Press to Stop"
else:
self.stream.stop()
self.stream.close()
self.recording = False
return "Recording stopped. Press to Record"
def callback(self, indata, frames, time, status):
if self.recording:
self.frames.append(indata.copy())
def save_audio(self):
if self.frames:
audio_data = np.concatenate(self.frames, axis=0)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
wavfile.write(temp_wav_file.name, self.sample_rate, audio_data)
return temp_wav_file.name
else:
return None
recorder = Recorder()
def record():
return recorder.toggle_recording()
def transcribe():
audio_file = recorder.save_audio()
if audio_file:
client = Groq(api_key="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")
with open(audio_file, "rb") as file:
transcription = client.audio.transcriptions.create(
file=(audio_file, file.read()),
model="whisper-large-v3",
prompt="Specify context or spelling", # Optional
response_format="json", # Optional
language="en", # Optional
temperature=0.0 # Optional
)
os.remove(audio_file) # Clean up the temporary file
# Inspect the transcription object to find the text
print(transcription)
# Access the text attribute directly if available
if hasattr(transcription, 'text'):
return transcription.text
else:
return "Transcription text not found."
else:
return "No audio recorded."
with gr.Blocks() as gradio_interface:
with gr.Column():
record_button = gr.Button("Press to Record")
record_button.click(fn=record, outputs=record_button)
transcription_output = gr.Textbox(label="Transcription")
record_button.click(fn=transcribe, outputs=transcription_output)
if __name__ == "__main__":
gradio_interface.launch()