Spaces:
Runtime error
Runtime error
#uvicorn app789:app --host 0.0.0.0 --port 8000 --reload | |
from fastapi import FastAPI, UploadFile, Form | |
from fastapi.responses import HTMLResponse | |
import librosa | |
import io | |
import json | |
import requests | |
import textwrap3 | |
import whisper | |
model = whisper.load_model("medium") | |
app = FastAPI() | |
# from faster_whisper import WhisperModel | |
# model_size = "medium" | |
# ts_model = WhisperModel(model_size, device="cpu", compute_type="int8") | |
def read_root(): | |
html_form = """ | |
<html> | |
<body> | |
<h2>Audio Transcription</h2> | |
<form action="/transcribe" method="post" enctype="multipart/form-data"> | |
<label for="audio_file">Upload an audio file (MP3 or WAV):</label> | |
<input type="file" id="audio_file" name="audio_file" accept=".mp3, .wav" required><br><br> | |
<label for="language_select">Select Target Language:</label> | |
<select id="language_select" name="tgt_lang"> | |
<option value="fr_XX">French</option> | |
<option value="es_XX">Spanish</option> | |
<option value="de_DE">German</option> | |
<option value="hi_IN">Hindi</option> | |
<option value="en_XX">English</option> | |
<option value="ja_XX">Japanese</option> | |
<option value="ne_NP">Nepali</option> | |
<option value="zh_CN">Chinese</option> | |
<option value="pt_XX">Portuguese</option> | |
<!-- Add more language options here --> | |
</select><br><br> | |
<input type="submit" value="Transcribe"> | |
<input type="hidden" id="tgt_lang" name="tgt_lang" value="fr_XX"> | |
</form> | |
</body> | |
</html> | |
<script> | |
document.getElementById("language_select").addEventListener("change", function () { | |
var selectedLanguage = this.value; | |
document.getElementById("tgt_lang").value = selectedLanguage; | |
}); | |
</script> | |
""" | |
return HTMLResponse(content=html_form, status_code=200) | |
async def transcribe_audio(audio_file: UploadFile, tgt_lang: str = Form(...)): | |
audio_data = await audio_file.read() | |
audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000) | |
result = model.transcribe(audio_data, task = "translate") | |
transcribed_text = result['text'] | |
if tgt_lang == 'en_XX': | |
return transcribed_text | |
else: | |
chunks = textwrap3.wrap(transcribed_text, 100) | |
#segments, _ = ts_model.transcribe(audio_data, task="translate") | |
# lst = [] | |
# for segment in segments: | |
# lst.append(segment.text) | |
headers = {"Authorization": f"Bearer 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"} | |
API_URL = "https://api-inference.huggingface.co/pipeline/translation/facebook/mbart-large-50-many-to-many-mmt" | |
def query(payload): | |
data = json.dumps(payload) | |
response = requests.request("POST", API_URL, headers=headers, data=data) | |
return json.loads(response.content.decode("utf-8")) | |
translated_text = '' | |
for i in chunks: | |
result = query({"inputs": i, "parameters": {"src_lang": "en_XX", "tgt_lang": tgt_lang}}) | |
translated_text = translated_text + result[0]['translation_text'] | |
return translated_text | |