SanyaAhmed commited on
Commit
82e14f1
β€’
1 Parent(s): dbd0fa6

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Load model directly
2
+ import streamlit as st
3
+ import torch
4
+ import numpy as np
5
+ import librosa
6
+ from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq, pipeline
7
+ from io import BytesIO
8
+
9
+ # Configure Streamlit page settings
10
+ st.set_page_config(
11
+ page_title="Transcribe with Whisper",
12
+ page_icon=":rocket:",
13
+ layout="centered"
14
+ )
15
+
16
+ st.title("πŸŽ™οΈ Whisper Audio Transcriber")
17
+ st.divider()
18
+
19
+ # Set up device and data type
20
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
21
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
22
+
23
+ # Load the Whisper model and processor
24
+ with st.spinner("πŸš€ Loading Whisper model... please wait!"):
25
+ model_name = "openai/whisper-large-v3"
26
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
27
+ model_name,
28
+ torch_dtype=torch_dtype,
29
+ low_cpu_mem_usage=True,
30
+ use_safetensors=True
31
+ )
32
+ model.to(device)
33
+ processor = AutoProcessor.from_pretrained(model_name)
34
+
35
+ # Initialize ASR pipeline
36
+ asr_pipe = pipeline(
37
+ task="automatic-speech-recognition",
38
+ model=model,
39
+ tokenizer=processor.tokenizer,
40
+ feature_extractor=processor.feature_extractor,
41
+ torch_dtype=torch_dtype,
42
+ device=device,
43
+ )
44
+
45
+ st.markdown("Upload your audio files, and let the Whisper model transcribe them instantly. πŸš€")
46
+
47
+ # File uploader for audio files
48
+ uploaded_files = st.file_uploader("πŸ“‚ Select audio files to transcribe", type=["wav","mp3"], accept_multiple_files=True)
49
+
50
+ # Transcription button and result display
51
+ if uploaded_files:
52
+ if st.button("✍️ Transcribe"):
53
+ results = []
54
+
55
+ for idx, audio_file in enumerate(uploaded_files):
56
+ try:
57
+ # Read audio file
58
+ audio_data, sr = librosa.load(BytesIO(audio_file.read()), sr=16000)
59
+
60
+ # Run ASR pipeline
61
+ result = asr_pipe(audio_data)
62
+ transcription = result['text']
63
+ results.append((audio_file.name, transcription))
64
+ except Exception as e:
65
+ st.error(f"Error processing '{audio_file.name}':{e}")
66
+
67
+ # Display results
68
+ st.subheader("Transcriptions")
69
+ for filename, transcription in results:
70
+ st.text_area(f"πŸ“‚ **{filename}**:", value=transcription)
71
+ else:
72
+ st.info("πŸ“€ Please upload audio files to start transcription.")