rawanahmed commited on
Commit
8c1ac08
·
verified ·
1 Parent(s): 2753d4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -37
app.py CHANGED
@@ -1,37 +1,37 @@
1
- import gradio as gr
2
- from datasets import load_dataset
3
- import torch
4
- from transformers import SpeechT5ForSpeechToText, SpeechT5Processor
5
-
6
- # Load the English subset of the VoxPopuli dataset
7
- dataset = load_dataset("VoxPopuli", "en")
8
-
9
- # Example function to load audio and transcriptions
10
- def get_sample(dataset):
11
- # Get a random sample from the training set
12
- sample = dataset['train'][0] # You can modify to pick a random sample or any sample index
13
- audio_file = sample["audio"]["path"]
14
- transcription = sample["sentence"]
15
- return audio_file, transcription
16
-
17
- # Initialize the SpeechT5 model and processor
18
- processor = SpeechT5Processor.from_pretrained("facebook/speech_t5_base")
19
- model = SpeechT5ForSpeechToText.from_pretrained("facebook/speech_t5_base")
20
-
21
- # Example Gradio interface function
22
- def transcribe(audio):
23
- # Process the audio and get transcription
24
- inputs = processor(audio, return_tensors="pt", sampling_rate=16000)
25
- with torch.no_grad():
26
- logits = model(**inputs).logits
27
- transcription = processor.decode(logits[0], skip_special_tokens=True)
28
- return transcription
29
-
30
- # Load a sample to check if everything is set up
31
- audio_file, transcription = get_sample(dataset)
32
-
33
- # Set up Gradio interface
34
- iface = gr.Interface(fn=transcribe, inputs=gr.Audio(source="upload", type="filepath"), outputs="text")
35
-
36
- # Launch the interface
37
- iface.launch()
 
1
+ import gradio as gr
2
+ from datasets import load_dataset
3
+ import torch
4
+ from transformers import SpeechT5ForSpeechToText, SpeechT5Processor
5
+
6
+ # Load the English subset of the VoxPopuli dataset
7
+ dataset = load_dataset("voxPopuli", "en")
8
+
9
+ # Example function to load audio and transcriptions
10
+ def get_sample(dataset):
11
+ # Get a random sample from the training set
12
+ sample = dataset['train'][0] # You can modify to pick a random sample or any sample index
13
+ audio_file = sample["audio"]["path"]
14
+ transcription = sample["sentence"]
15
+ return audio_file, transcription
16
+
17
+ # Initialize the SpeechT5 model and processor
18
+ processor = SpeechT5Processor.from_pretrained("facebook/speech_t5_base")
19
+ model = SpeechT5ForSpeechToText.from_pretrained("facebook/speech_t5_base")
20
+
21
+ # Example Gradio interface function
22
+ def transcribe(audio):
23
+ # Process the audio and get transcription
24
+ inputs = processor(audio, return_tensors="pt", sampling_rate=16000)
25
+ with torch.no_grad():
26
+ logits = model(**inputs).logits
27
+ transcription = processor.decode(logits[0], skip_special_tokens=True)
28
+ return transcription
29
+
30
+ # Load a sample to check if everything is set up
31
+ audio_file, transcription = get_sample(dataset)
32
+
33
+ # Set up Gradio interface
34
+ iface = gr.Interface(fn=transcribe, inputs=gr.Audio(source="upload", type="filepath"), outputs="text")
35
+
36
+ # Launch the interface
37
+ iface.launch()