Porjaz commited on
Commit
57bd602
1 Parent(s): 9176320

Update custom_interface_app.py

Browse files
Files changed (1) hide show
  1. custom_interface_app.py +12 -12
custom_interface_app.py CHANGED
@@ -126,17 +126,17 @@ class ASR(Pretrained):
126
  return waveform
127
 
128
 
129
- def classify_file_w2v2(self, path, device):
130
  # Load the audio file
131
- waveform, sr = librosa.load(path, sr=16000)
132
 
133
  # Get audio length in seconds
134
- audio_length = len(waveform) / sr
135
 
136
  if audio_length >= 20:
137
  # split audio every 20 seconds
138
  segments = []
139
- max_duration = 20 * sr # Maximum segment duration in samples (20 seconds)
140
  num_segments = int(np.ceil(len(waveform) / max_duration))
141
  start = 0
142
  for i in range(num_segments):
@@ -144,7 +144,7 @@ class ASR(Pretrained):
144
  if end > len(waveform):
145
  end = len(waveform)
146
  segment_part = waveform[start:end]
147
- segment_len = len(segment_part) / sr
148
  if segment_len < 1:
149
  continue
150
  segments.append(segment_part)
@@ -216,23 +216,23 @@ class ASR(Pretrained):
216
 
217
 
218
 
219
- def classify_file_whisper(self, path, pipe, device):
220
- waveform, sr = librosa.load(path, sr=16000)
221
  transcription = pipe(waveform, generate_kwargs={"language": "macedonian"})["text"]
222
  return transcription
223
 
224
 
225
- def classify_file_mms(self, path, processor, model, device):
226
  # Load the audio file
227
- waveform, sr = librosa.load(path, sr=16000)
228
 
229
  # Get audio length in seconds
230
- audio_length = len(waveform) / sr
231
 
232
  if audio_length >= 20:
233
  # split audio every 20 seconds
234
  segments = []
235
- max_duration = 20 * sr # Maximum segment duration in samples (20 seconds)
236
  num_segments = int(np.ceil(len(waveform) / max_duration))
237
  start = 0
238
  for i in range(num_segments):
@@ -240,7 +240,7 @@ class ASR(Pretrained):
240
  if end > len(waveform):
241
  end = len(waveform)
242
  segment_part = waveform[start:end]
243
- segment_len = len(segment_part) / sr
244
  if segment_len < 1:
245
  continue
246
  segments.append(segment_part)
 
126
  return waveform
127
 
128
 
129
+ def classify_file_w2v2(self, waveform, device):
130
  # Load the audio file
131
+ # waveform, sr = librosa.load(path, sr=16000)
132
 
133
  # Get audio length in seconds
134
+ audio_length = len(waveform) / 16000
135
 
136
  if audio_length >= 20:
137
  # split audio every 20 seconds
138
  segments = []
139
+ max_duration = 20 * 16000 # Maximum segment duration in samples (20 seconds)
140
  num_segments = int(np.ceil(len(waveform) / max_duration))
141
  start = 0
142
  for i in range(num_segments):
 
144
  if end > len(waveform):
145
  end = len(waveform)
146
  segment_part = waveform[start:end]
147
+ segment_len = len(segment_part) / 16000
148
  if segment_len < 1:
149
  continue
150
  segments.append(segment_part)
 
216
 
217
 
218
 
219
+ def classify_file_whisper(self, waveform, pipe, device):
220
+ # waveform, sr = librosa.load(path, sr=16000)
221
  transcription = pipe(waveform, generate_kwargs={"language": "macedonian"})["text"]
222
  return transcription
223
 
224
 
225
+ def classify_file_mms(self, waveform, processor, model, device):
226
  # Load the audio file
227
+ # waveform, sr = librosa.load(path, sr=16000)
228
 
229
  # Get audio length in seconds
230
+ audio_length = len(waveform) / 16000
231
 
232
  if audio_length >= 20:
233
  # split audio every 20 seconds
234
  segments = []
235
+ max_duration = 20 * 16000 # Maximum segment duration in samples (20 seconds)
236
  num_segments = int(np.ceil(len(waveform) / max_duration))
237
  start = 0
238
  for i in range(num_segments):
 
240
  if end > len(waveform):
241
  end = len(waveform)
242
  segment_part = waveform[start:end]
243
+ segment_len = len(segment_part) / 16000
244
  if segment_len < 1:
245
  continue
246
  segments.append(segment_part)