File size: 3,389 Bytes
04cf650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#uvicorn app789:app --host 0.0.0.0 --port 8000 --reload

from fastapi import FastAPI, UploadFile, Form
from fastapi.responses import HTMLResponse
import librosa
import io
import json
import requests
import textwrap3

import whisper
model = whisper.load_model("medium")

app = FastAPI()

# from faster_whisper import WhisperModel
# model_size = "medium"
# ts_model = WhisperModel(model_size, device="cpu", compute_type="int8")

@app.get("/")
def read_root():
    html_form = """
    <html>
        <body>
            <h2>Audio Transcription</h2>
            <form action="/transcribe" method="post" enctype="multipart/form-data">
                <label for="audio_file">Upload an audio file (MP3 or WAV):</label>
                <input type="file" id="audio_file" name="audio_file" accept=".mp3, .wav" required><br><br>
                <label for="language_select">Select Target Language:</label>
                <select id="language_select" name="tgt_lang">
                    <option value="fr_XX">French</option>
                    <option value="es_XX">Spanish</option>
                    <option value="de_DE">German</option>
                    <option value="hi_IN">Hindi</option>
                    <option value="en_XX">English</option>
                    <option value="ja_XX">Japanese</option>
                    <option value="ne_NP">Nepali</option>
                    <option value="zh_CN">Chinese</option>
                    <option value="pt_XX">Portuguese</option>
                    <!-- Add more language options here -->
                </select><br><br>
                <input type="submit" value="Transcribe">
                <input type="hidden" id="tgt_lang" name="tgt_lang" value="fr_XX">
            </form>
        </body>
    </html>
    <script>
        document.getElementById("language_select").addEventListener("change", function () {
            var selectedLanguage = this.value;
            document.getElementById("tgt_lang").value = selectedLanguage;
        });
    </script>
    """
    return HTMLResponse(content=html_form, status_code=200)

@app.post("/transcribe")
async def transcribe_audio(audio_file: UploadFile, tgt_lang: str = Form(...)):
    audio_data = await audio_file.read()
    
    audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
    result = model.transcribe(audio_data, task = "translate")
    transcribed_text = result['text']

    if tgt_lang == 'en_XX':
        return transcribed_text

    else:
        chunks = textwrap3.wrap(transcribed_text, 100)
        #segments, _ = ts_model.transcribe(audio_data, task="translate")
        # lst = []
        # for segment in segments:
        #     lst.append(segment.text)

        headers = {"Authorization": f"Bearer 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"}
        API_URL = "https://api-inference.huggingface.co/pipeline/translation/facebook/mbart-large-50-many-to-many-mmt"

        def query(payload):
            data = json.dumps(payload)
            response = requests.request("POST", API_URL, headers=headers, data=data)
            return json.loads(response.content.decode("utf-8"))

        translated_text = ''

        for i in chunks:
            result = query({"inputs": i, "parameters": {"src_lang": "en_XX", "tgt_lang": tgt_lang}})
            translated_text = translated_text + result[0]['translation_text']

        return translated_text