Macedonian-ASR
/

whisper-large-v3-macedonian-asr

Macedonian

Model card Files Files and versions Community

Porjaz commited on Oct 7

Commit

57bd602

•

1 Parent(s): 9176320

Update custom_interface_app.py

Browse files

Files changed (1) hide show

custom_interface_app.py +12 -12

custom_interface_app.py CHANGED Viewed

@@ -126,17 +126,17 @@ class ASR(Pretrained):
         return waveform
-    def classify_file_w2v2(self, path, device):
         # Load the audio file
-        waveform, sr = librosa.load(path, sr=16000)
         # Get audio length in seconds
-        audio_length = len(waveform) / sr
         if audio_length >= 20:
             # split audio every 20 seconds
             segments = []
-            max_duration = 20 * sr  # Maximum segment duration in samples (20 seconds)
             num_segments = int(np.ceil(len(waveform) / max_duration))
             start = 0
             for i in range(num_segments):
@@ -144,7 +144,7 @@ class ASR(Pretrained):
                 if end > len(waveform):
                     end = len(waveform)
                 segment_part = waveform[start:end]
-                segment_len = len(segment_part) / sr
                 if segment_len < 1:
                     continue
                 segments.append(segment_part)
@@ -216,23 +216,23 @@ class ASR(Pretrained):
-    def classify_file_whisper(self, path, pipe, device):
-        waveform, sr = librosa.load(path, sr=16000)
         transcription = pipe(waveform, generate_kwargs={"language": "macedonian"})["text"]
         return transcription
-    def classify_file_mms(self, path, processor, model, device):
         # Load the audio file
-        waveform, sr = librosa.load(path, sr=16000)
         # Get audio length in seconds
-        audio_length = len(waveform) / sr
         if audio_length >= 20:
             # split audio every 20 seconds
             segments = []
-            max_duration = 20 * sr  # Maximum segment duration in samples (20 seconds)
             num_segments = int(np.ceil(len(waveform) / max_duration))
             start = 0
             for i in range(num_segments):
@@ -240,7 +240,7 @@ class ASR(Pretrained):
                 if end > len(waveform):
                     end = len(waveform)
                 segment_part = waveform[start:end]
-                segment_len = len(segment_part) / sr
                 if segment_len < 1:
                     continue
                 segments.append(segment_part)

         return waveform
+    def classify_file_w2v2(self, waveform, device):
         # Load the audio file
+        # waveform, sr = librosa.load(path, sr=16000)
         # Get audio length in seconds
+        audio_length = len(waveform) / 16000
         if audio_length >= 20:
             # split audio every 20 seconds
             segments = []
+            max_duration = 20 * 16000  # Maximum segment duration in samples (20 seconds)
             num_segments = int(np.ceil(len(waveform) / max_duration))
             start = 0
             for i in range(num_segments):
                 if end > len(waveform):
                     end = len(waveform)
                 segment_part = waveform[start:end]
+                segment_len = len(segment_part) / 16000
                 if segment_len < 1:
                     continue
                 segments.append(segment_part)
+    def classify_file_whisper(self, waveform, pipe, device):
+        # waveform, sr = librosa.load(path, sr=16000)
         transcription = pipe(waveform, generate_kwargs={"language": "macedonian"})["text"]
         return transcription
+    def classify_file_mms(self, waveform, processor, model, device):
         # Load the audio file
+        # waveform, sr = librosa.load(path, sr=16000)
         # Get audio length in seconds
+        audio_length = len(waveform) / 16000
         if audio_length >= 20:
             # split audio every 20 seconds
             segments = []
+            max_duration = 20 * 16000  # Maximum segment duration in samples (20 seconds)
             num_segments = int(np.ceil(len(waveform) / max_duration))
             start = 0
             for i in range(num_segments):
                 if end > len(waveform):
                     end = len(waveform)
                 segment_part = waveform[start:end]
+                segment_len = len(segment_part) / 16000
                 if segment_len < 1:
                     continue
                 segments.append(segment_part)