sanjitaa's picture
upload app files
04cf650
raw
history blame
3.39 kB
#uvicorn app789:app --host 0.0.0.0 --port 8000 --reload
from fastapi import FastAPI, UploadFile, Form
from fastapi.responses import HTMLResponse
import librosa
import io
import json
import requests
import textwrap3
import whisper
model = whisper.load_model("medium")
app = FastAPI()
# from faster_whisper import WhisperModel
# model_size = "medium"
# ts_model = WhisperModel(model_size, device="cpu", compute_type="int8")
@app.get("/")
def read_root():
html_form = """
<html>
<body>
<h2>Audio Transcription</h2>
<form action="/transcribe" method="post" enctype="multipart/form-data">
<label for="audio_file">Upload an audio file (MP3 or WAV):</label>
<input type="file" id="audio_file" name="audio_file" accept=".mp3, .wav" required><br><br>
<label for="language_select">Select Target Language:</label>
<select id="language_select" name="tgt_lang">
<option value="fr_XX">French</option>
<option value="es_XX">Spanish</option>
<option value="de_DE">German</option>
<option value="hi_IN">Hindi</option>
<option value="en_XX">English</option>
<option value="ja_XX">Japanese</option>
<option value="ne_NP">Nepali</option>
<option value="zh_CN">Chinese</option>
<option value="pt_XX">Portuguese</option>
<!-- Add more language options here -->
</select><br><br>
<input type="submit" value="Transcribe">
<input type="hidden" id="tgt_lang" name="tgt_lang" value="fr_XX">
</form>
</body>
</html>
<script>
document.getElementById("language_select").addEventListener("change", function () {
var selectedLanguage = this.value;
document.getElementById("tgt_lang").value = selectedLanguage;
});
</script>
"""
return HTMLResponse(content=html_form, status_code=200)
@app.post("/transcribe")
async def transcribe_audio(audio_file: UploadFile, tgt_lang: str = Form(...)):
audio_data = await audio_file.read()
audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
result = model.transcribe(audio_data, task = "translate")
transcribed_text = result['text']
if tgt_lang == 'en_XX':
return transcribed_text
else:
chunks = textwrap3.wrap(transcribed_text, 100)
#segments, _ = ts_model.transcribe(audio_data, task="translate")
# lst = []
# for segment in segments:
# lst.append(segment.text)
headers = {"Authorization": f"Bearer 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"}
API_URL = "https://api-inference.huggingface.co/pipeline/translation/facebook/mbart-large-50-many-to-many-mmt"
def query(payload):
data = json.dumps(payload)
response = requests.request("POST", API_URL, headers=headers, data=data)
return json.loads(response.content.decode("utf-8"))
translated_text = ''
for i in chunks:
result = query({"inputs": i, "parameters": {"src_lang": "en_XX", "tgt_lang": tgt_lang}})
translated_text = translated_text + result[0]['translation_text']
return translated_text