jonatasgrosman
/

wav2vec2-large-xlsr-53-arabic

Automatic Speech Recognition

xlsr-fine-tuning-week

Inference Endpoints

Model card Files Files and versions Community

bilal6913 commited on Sep 27, 2024

Commit

0b7d904

·

verified ·

1 Parent(s): af46c2d

Create app.py

Files changed (1) hide show

app.py +63 -0

app.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from flask import Flask, request, jsonify, render_template
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+import torch
+import torchaudio
+import os
+# Initialize Flask app
+app = Flask(__name__)
+# Load the model and processor
+model_name = "jonatasgrosman/wav2vec2-large-xlsr-53-arabic"
+processor = Wav2Vec2Processor.from_pretrained(model_name)
+model = Wav2Vec2ForCTC.from_pretrained(model_name)
+# Define the upload folder
+UPLOAD_FOLDER = 'uploads'
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+# Ensure the upload folder exists
+if not os.path.exists(UPLOAD_FOLDER):
+    os.makedirs(UPLOAD_FOLDER)
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/transcribe', methods=['POST'])
+def transcribe_audio():
+    if 'file' not in request.files:
+        return jsonify({'error': 'No file part'}), 400
+    file = request.files['file']
+    if file.filename == '':
+        return jsonify({'error': 'No selected file'}), 400
+    if file:
+        # Save the uploaded file
+        file_path = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
+        file.save(file_path)
+        # Load the audio file
+        speech_array, sampling_rate = torchaudio.load(file_path)
+        speech_array = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000)(speech_array)
+        # Process the audio input
+        input_values = processor(speech_array.squeeze().numpy(), return_tensors="pt", sampling_rate=16000).input_values
+        # Perform inference
+        with torch.no_grad():
+            logits = model(input_values).logits
+        # Get the predicted transcription
+        predicted_ids = torch.argmax(logits, dim=-1)
+        transcription = processor.batch_decode(predicted_ids)
+        return jsonify({'transcription': transcription[0]})
+    return jsonify({'error': 'Something went wrong!'}), 500
+if __name__ == '__main__':
+    app.run(debug=True)