Spaces:

unijoh
/

metaambod

Running

App Files Files Community

unijoh commited on Jan 9

Commit

e7e280b

verified ·

1 Parent(s): 1bfb695

Update asr.py

Browse files

Files changed (1) hide show

asr.py +52 -101

asr.py CHANGED Viewed

@@ -1,104 +1,55 @@
-import gradio as gr
-import time
-from transformers import pipeline
 import torch
-import ffmpeg  # Make sure it's ffmpeg-python
-def main():
-    # Check if GPU is available
-    use_gpu = torch.cuda.is_available()
-    # Configure the pipeline to use the GPU if available
-    if use_gpu:
-        p = pipeline(
-            "automatic-speech-recognition",
-            model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h",
-            device=0
-        )
-    else:
-        p = pipeline(
-            "automatic-speech-recognition",
-            model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
-        )
-    def extract_audio_from_m3u8(url):
-        try:
-            output_file = "output_audio.aac"
-            ffmpeg.input(url).output(output_file).run(overwrite_output=True)
-            return output_file
-        except Exception as e:
-            return f"An error occurred: {e}"
-    def transcribe_function(audio, state, uploaded_audio, m3u8_url):
-        if m3u8_url:
-            audio = extract_audio_from_m3u8(m3u8_url)
-        if uploaded_audio is not None:
-            audio = uploaded_audio
-        if not audio:
-            # Return a meaningful message; no audio found
-            return {state_var: state, transcription_var: state}
         try:
-            time.sleep(3)
-            text = p(audio, chunk_length_s=50)["text"]
-            state += text + "\n"
-            return {state_var: state, transcription_var: state}
         except Exception as e:
-            return {
-                transcription_var: "An error occurred during transcription.",
-                state_var: state
-            }
-    def reset_output(transcription, state):
-        """Function to reset the state to an empty string."""
-        return "", ""
-    with gr.Blocks() as demo:
-        state_var = gr.State("")
-        with gr.Row():
-            with gr.Column():
-                microphone = gr.Audio(
-                    source="microphone",
-                    type="filepath",
-                    label="Microphone"
-                )
-                uploaded_audio = gr.Audio(
-                    label="Upload Audio File",
-                    type="filepath",
-                    source="upload"
-                )
-                m3u8_url = gr.Textbox(
-                    label="m3u8 URL | E.g.: from kvf.fo or logting.fo"
-                )
-            with gr.Column():
-                transcription_var = gr.Textbox(
-                    type="text",
-                    label="Transcription",
-                    readonly=True
-                )
-        with gr.Row():
-            transcribe_button = gr.Button("Transcribe")
-            reset_button = gr.Button("Reset output")
-        transcribe_button.click(
-            transcribe_function,
-            [microphone, state_var, uploaded_audio, m3u8_url],
-            [transcription_var, state_var]
-        )
-        reset_button.click(
-            reset_output,
-            [transcription_var, state_var],
-            [transcription_var, state_var]
-        )
-    # Launch with the latest Gradio features
-    demo.launch()
-if __name__ == "__main__":
-    main()

+import librosa
+from transformers import AutoProcessor, Wav2Vec2ForCTC
 import torch
+import logging
+# Set up logging
+logging.basicConfig(level=logging.DEBUG)
+ASR_SAMPLING_RATE = 16_000
+MODEL_ID = "facebook/mms-1b-all"
+try:
+    processor = AutoProcessor.from_pretrained(MODEL_ID)
+    model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
+    logging.info("ASR model and processor loaded successfully.")
+except Exception as e:
+    logging.error(f"Error loading ASR model or processor: {e}")
+def transcribe(audio):
+    try:
+        if audio is None:
+            logging.error("No audio file provided")
+            return "ERROR: You have to either use the microphone or upload an audio file"
+        logging.info(f"Loading audio file: {audio}")
+        # Try loading the audio file with librosa
         try:
+            audio_samples, _ = librosa.load(audio, sr=ASR_SAMPLING_RATE, mono=True)
+        except FileNotFoundError:
+            logging.error("Audio file not found")
+            return "ERROR: Audio file not found"
         except Exception as e:
+            logging.error(f"Error loading audio file with librosa: {e}")
+            return f"ERROR: Unable to load audio file - {e}"
+        # Set the language for the processor to Faroese
+        lang_code = "fao"
+        processor.tokenizer.set_target_lang(lang_code)
+        model.load_adapter(lang_code)
+        # Process the audio with the processor
+        inputs = processor(audio_samples, sampling_rate=ASR_SAMPLING_RATE, return_tensors="pt")
+        with torch.no_grad():
+            outputs = model(**inputs).logits
+        ids = torch.argmax(outputs, dim=-1)[0]
+        transcription = processor.decode(ids)
+        logging.info("Transcription completed successfully.")
+        return transcription
+    except Exception as e:
+        logging.error(f"Error during transcription: {e}")
+        return "ERROR"