|
''' |
|
Created By Lewis Kamau Kimaru |
|
Sema fastapi backend |
|
August 2023 |
|
''' |
|
|
|
from fastapi import FastAPI, HTTPException, Request |
|
from fastapi.middleware.cors import CORSMiddleware |
|
from fastapi.responses import HTMLResponse |
|
import gradio as gr |
|
import ctranslate2 |
|
import sentencepiece as spm |
|
import fasttext |
|
import uvicorn |
|
from pyngrok import ngrok |
|
import nest_asyncio |
|
import os |
|
|
|
app = FastAPI() |
|
|
|
|
|
|
|
|
|
|
|
|
|
ngrok.set_auth_token("2UGQqzZoI3bx7SSk8H4wuFC3iaC_2WniWyNAsW5fd2rFyKVq1") |
|
|
|
|
|
fasttext.FastText.eprint = lambda x: None |
|
|
|
|
|
beam_size = 1 |
|
device = "cpu" |
|
|
|
|
|
print("\nimporting Language Prediction model") |
|
lang_model_file = "modules/lid218e.bin" |
|
lang_model_full_path = os.path.join(os.path.dirname(__file__), lang_model_file) |
|
lang_model = fasttext.load_model(lang_model_full_path) |
|
|
|
|
|
|
|
print("\nimporting SentencePiece model") |
|
sp_model_file = "modules/spm.model" |
|
sp_model_full_path = os.path.join(os.path.dirname(__file__), sp_model_file) |
|
sp = spm.SentencePieceProcessor() |
|
sp.load(sp_model_full_path) |
|
|
|
|
|
print("\nimporting Translator model") |
|
ct_model_file = "modules/sematrans-3.3B" |
|
ct_model_full_path = os.path.join(os.path.dirname(__file__), ct_model_file) |
|
translator = ctranslate2.Translator(ct_model_full_path, device) |
|
|
|
print('\nDone importing models\n') |
|
|
|
|
|
def translate_text(userinput: str, target_lang: str): |
|
source_sents = [userinput] |
|
source_sents = [sent.strip() for sent in source_sents] |
|
target_prefix = [[target_lang]] * len(source_sents) |
|
|
|
|
|
predictions = lang_model.predict(source_sents[0], k=1) |
|
source_lang = predictions[0][0].replace('__label__', '') |
|
|
|
|
|
source_sents_subworded = sp.encode(source_sents, out_type=str) |
|
source_sents_subworded = [[source_lang] + sent + ["</s>"] for sent in source_sents_subworded] |
|
|
|
|
|
translations = translator.translate_batch( |
|
source_sents_subworded, |
|
batch_type="tokens", |
|
max_batch_size=2024, |
|
beam_size=beam_size, |
|
target_prefix=target_prefix, |
|
) |
|
translations = [translation[0]['tokens'] for translation in translations] |
|
|
|
|
|
translations_desubword = sp.decode(translations) |
|
translations_desubword = [sent[len(target_lang):] for sent in translations_desubword] |
|
|
|
|
|
return source_lang, translations_desubword |
|
|
|
|
|
@app.get("/") |
|
def read_root(): |
|
return {"message": "Welcome to the Sema Translation API! \nThis API was created by Lewsi Kamau Kimaru"} |
|
|
|
|
|
@app.post("/translate/") |
|
async def translate_endpoint(request: Request): |
|
data = await request.json() |
|
userinput = data.get("userinput") |
|
target_lang = data.get("target_lang") |
|
print(f"\n Target Language; {target_lang}, User Input: {userinput}\n") |
|
|
|
if not userinput or not target_lang: |
|
raise HTTPException(status_code=422, detail="Both 'userinput' and 'target_lang' are required.") |
|
|
|
source_lang, translated_text = translate_text(userinput, target_lang) |
|
print(f"\nsource_language: {source_lang}, Translated Text: {translated_text}\n\n") |
|
return { |
|
"source_language": source_lang, |
|
"translated_text": translated_text[0], |
|
} |
|
|
|
ngrok_tunnel = ngrok.connect(7860) |
|
public_url = ngrok_tunnel.public_url |
|
print('\nPublic URL✅:', public_url) |
|
nest_asyncio.apply() |
|
|
|
print("\nAPI starting .......\n") |
|
|
|
|