allandclive commited on
Commit
f01f4ac
·
1 Parent(s): 52f0ba0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -22
app.py CHANGED
@@ -5,9 +5,12 @@ import json
5
  from transformers import pipeline
6
  from stitched_model import CombinedModel
7
 
 
8
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
9
 
10
- model = CombinedModel("indonesian-nlp/wav2vec2-luganda", "Sunbird/sunbird-mul-en-mbart-merged", device=device)
 
 
11
 
12
  def transcribe(audio_file_mic=None, audio_file_upload=None):
13
  if audio_file_mic:
@@ -17,27 +20,16 @@ def transcribe(audio_file_mic=None, audio_file_upload=None):
17
  else:
18
  return "Please upload an audio file or record one"
19
 
20
- # Load the audio file
21
- speech, sample_rate = librosa.load(audio_file, sr=16000, mono=True)
22
-
23
- # Split the audio into 10-second chunks
24
- chunk_size = 10 * 16000
25
- chunks = [speech[i:i + chunk_size] for i in range(0, len(speech), chunk_size)]
26
-
27
- # Process each chunk and concatenate the results
28
- transcriptions = []
29
- translations = []
30
- for chunk in chunks:
31
- chunk = torch.tensor([chunk])
32
- with torch.no_grad():
33
- transcription, translation = model({"audio": chunk})
34
- transcriptions.append(transcription)
35
- translations.append(translation[0])
36
-
37
- transcription = "".join(transcriptions)
38
- translation = "".join(translations)
39
 
40
- return transcription, translation
 
 
 
41
 
42
  description = '''Luganda to English Speech Translation'''
43
 
@@ -50,4 +42,4 @@ iface = gr.Interface(fn=transcribe,
50
  ],
51
  description=description
52
  )
53
- iface.launch()
 
5
  from transformers import pipeline
6
  from stitched_model import CombinedModel
7
 
8
+
9
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
10
 
11
+ model = CombinedModel("facebook/mms-1b-all", "Sunbird/sunbird-mul-en-mbart-merged", device="cpu")
12
+
13
+
14
 
15
  def transcribe(audio_file_mic=None, audio_file_upload=None):
16
  if audio_file_mic:
 
20
  else:
21
  return "Please upload an audio file or record one"
22
 
23
+ # Make sure audio is 16kHz
24
+ speech, sample_rate = librosa.load(audio_file)
25
+ if sample_rate != 16000:
26
+ speech = librosa.resample(speech, orig_sr=sample_rate, target_sr=16000)
27
+ speech = torch.tensor([speech])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ with torch.no_grad():
30
+ transcription, translation = model({"audio":speech})
31
+
32
+ return transcription, translation[0]
33
 
34
  description = '''Luganda to English Speech Translation'''
35
 
 
42
  ],
43
  description=description
44
  )
45
+ iface.launch()