Upload folder using huggingface_hub
Browse files- app.py +118 -0
- index.html +52 -0
- style.css +13 -0
app.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
Created By Lewis Kamau Kimaru
|
3 |
+
Sema fastapi backend
|
4 |
+
August 2023
|
5 |
+
'''
|
6 |
+
|
7 |
+
from fastapi import FastAPI, HTTPException, Request
|
8 |
+
from fastapi.middleware.cors import CORSMiddleware
|
9 |
+
from fastapi.responses import HTMLResponse
|
10 |
+
from flores200_codes import flores_codes
|
11 |
+
import gradio as gr
|
12 |
+
import ctranslate2
|
13 |
+
import sentencepiece as spm
|
14 |
+
import fasttext
|
15 |
+
import uvicorn
|
16 |
+
from pyngrok import ngrok
|
17 |
+
import nest_asyncio
|
18 |
+
import os
|
19 |
+
|
20 |
+
app = FastAPI()
|
21 |
+
|
22 |
+
# Set your ngrok authtoken
|
23 |
+
#ngrok.set_auth_token("2UAhCqf5zP0cCgJzeadNANkbIqx_7ZJvhkDSNWccqMX2hyxXP")
|
24 |
+
#ngrok.set_auth_token("2S6xeFEoSVFWr2egtDRcqgeUtSx_2juefHFkEW6nGbpRHS37W")
|
25 |
+
#ngrok.set_auth_token("2UAmdjHdAFV9x84TdyEknIfNhYk_4Ye8n4YK7ZhfCMob3yPBh")
|
26 |
+
#ngrok.set_auth_token("2UAqm26HuWiWvQjzK58xYufSGpy_6tStKSyLLyR9f7pcezh6R")
|
27 |
+
ngrok.set_auth_token("2UGQqzZoI3bx7SSk8H4wuFC3iaC_2WniWyNAsW5fd2rFyKVq1")
|
28 |
+
|
29 |
+
|
30 |
+
fasttext.FastText.eprint = lambda x: None
|
31 |
+
|
32 |
+
# Load the model and tokenizer ..... only once!
|
33 |
+
beam_size = 1 # change to a smaller value for faster inference
|
34 |
+
device = "cpu" # or "cuda"
|
35 |
+
|
36 |
+
# Language Prediction model
|
37 |
+
print("\nimporting Language Prediction model")
|
38 |
+
lang_model_file = "Sema/lid218e.bin"
|
39 |
+
lang_model_full_path = os.path.join(os.path.dirname(__file__), lang_model_file)
|
40 |
+
lang_model = fasttext.load_model(lang_model_full_path)
|
41 |
+
|
42 |
+
|
43 |
+
# Load the source SentencePiece model
|
44 |
+
print("\nimporting SentencePiece model")
|
45 |
+
sp_model_file = "Sema/spm.model"
|
46 |
+
sp_model_full_path = os.path.join(os.path.dirname(__file__), sp_model_file)
|
47 |
+
sp = spm.SentencePieceProcessor()
|
48 |
+
sp.load(sp_model_full_path)
|
49 |
+
|
50 |
+
# Import The Translator model
|
51 |
+
print("\nimporting Translator model")
|
52 |
+
ct_model_file = "Sema/sematrans-3.3B"
|
53 |
+
ct_model_full_path = os.path.join(os.path.dirname(__file__), ct_model_file)
|
54 |
+
translator = ctranslate2.Translator(ct_model_full_path, device)
|
55 |
+
|
56 |
+
print('\nDone importing models\n')
|
57 |
+
|
58 |
+
|
59 |
+
def translate_text(userinput: str, target_lang: str):
|
60 |
+
source_sents = [userinput]
|
61 |
+
source_sents = [sent.strip() for sent in source_sents]
|
62 |
+
target_prefix = [[target_lang]] * len(source_sents)
|
63 |
+
|
64 |
+
# Predict the source language
|
65 |
+
predictions = lang_model.predict(source_sents[0], k=1)
|
66 |
+
source_lang = predictions[0][0].replace('__label__', '')
|
67 |
+
|
68 |
+
# Subword the source sentences
|
69 |
+
source_sents_subworded = sp.encode(source_sents, out_type=str)
|
70 |
+
source_sents_subworded = [[source_lang] + sent + ["</s>"] for sent in source_sents_subworded]
|
71 |
+
|
72 |
+
# Translate the source sentences
|
73 |
+
translations = translator.translate_batch(
|
74 |
+
source_sents_subworded,
|
75 |
+
batch_type="tokens",
|
76 |
+
max_batch_size=2024,
|
77 |
+
beam_size=beam_size,
|
78 |
+
target_prefix=target_prefix,
|
79 |
+
)
|
80 |
+
translations = [translation[0]['tokens'] for translation in translations]
|
81 |
+
|
82 |
+
# Desubword the target sentences
|
83 |
+
translations_desubword = sp.decode(translations)
|
84 |
+
translations_desubword = [sent[len(target_lang):] for sent in translations_desubword]
|
85 |
+
|
86 |
+
# Return the source language and the translated text
|
87 |
+
return source_lang, translations_desubword
|
88 |
+
|
89 |
+
|
90 |
+
@app.get("/")
|
91 |
+
def read_root():
|
92 |
+
return {"message": "Welcome to the Sema Translation API! \nThis API was created by Lewsi Kamau Kimaru"}
|
93 |
+
|
94 |
+
|
95 |
+
@app.post("/translate/")
|
96 |
+
async def translate_endpoint(request: Request):
|
97 |
+
data = await request.json()
|
98 |
+
userinput = data.get("userinput")
|
99 |
+
target_lang = data.get("target_lang")
|
100 |
+
print(f"\n Target Language; {target_lang}, User Input: {userinput}\n")
|
101 |
+
|
102 |
+
if not userinput or not target_lang:
|
103 |
+
raise HTTPException(status_code=422, detail="Both 'userinput' and 'target_lang' are required.")
|
104 |
+
|
105 |
+
source_lang, translated_text = translate_text(userinput, target_lang)
|
106 |
+
print(f"\nsource_language: {source_lang}, Translated Text: {translated_text}\n\n")
|
107 |
+
return {
|
108 |
+
"source_language": source_lang,
|
109 |
+
"translated_text": translated_text[0],
|
110 |
+
}
|
111 |
+
|
112 |
+
ngrok_tunnel = ngrok.connect(7860)
|
113 |
+
public_url = ngrok_tunnel.public_url
|
114 |
+
print('\nPublic URL✅:', public_url)
|
115 |
+
nest_asyncio.apply()
|
116 |
+
|
117 |
+
print("\nAPI starting .......\n")
|
118 |
+
uvicorn.run(app, port=7860)
|
index.html
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html>
|
3 |
+
<head>
|
4 |
+
<meta charset="utf-8">
|
5 |
+
<meta name="viewport" content="width=device-width">
|
6 |
+
<title>Sema Translator</title>
|
7 |
+
<link rel="stylesheet" href="style.css" />
|
8 |
+
<py-script src="modules/app.py"></py-script>
|
9 |
+
</head>
|
10 |
+
<body>
|
11 |
+
<div class="Header">
|
12 |
+
<h1>Sema Translator</h1>
|
13 |
+
<p>Unlock the Power of Global Communication with Sema Translator! Seamlessly bridging language barriers. With support for over 200 languages, Sema Translator opens up a realm of possibilities for building truly global applications.</p>
|
14 |
+
|
15 |
+
</div>
|
16 |
+
|
17 |
+
<div class="LanguageDropdown">
|
18 |
+
<h2>Select a Language:</h2>
|
19 |
+
<select id="languageSelect">
|
20 |
+
<!-- Add options dynamically -->
|
21 |
+
</select>
|
22 |
+
</div>
|
23 |
+
|
24 |
+
<div class="instructions">
|
25 |
+
<p>Use the following python code to access the api endpoint</p>
|
26 |
+
<pre style="text-align: left;">
|
27 |
+
import requests
|
28 |
+
|
29 |
+
url = "{public_url}/translate/"
|
30 |
+
data = {
|
31 |
+
"userinput": "rũcinĩ rwega, niwokĩra wega?",
|
32 |
+
"target_lang": "eng_Latn",
|
33 |
+
}
|
34 |
+
|
35 |
+
response = requests.post(url, json=data)
|
36 |
+
result = response.json()
|
37 |
+
|
38 |
+
print(result)
|
39 |
+
|
40 |
+
source_language = result['source_language']
|
41 |
+
print("Source Language:", source_language)
|
42 |
+
|
43 |
+
translation = result['translated_text']
|
44 |
+
print("Translated text:", translation)
|
45 |
+
</pre>
|
46 |
+
</div>
|
47 |
+
|
48 |
+
<div class="footer">
|
49 |
+
<h1>Created by Lewis Kamau Kiamru</h1>
|
50 |
+
</div>
|
51 |
+
</body>
|
52 |
+
</html>
|
style.css
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.Header {
|
2 |
+
text-align: center;
|
3 |
+
}
|
4 |
+
.LanguageDropdown {
|
5 |
+
text-align: center;
|
6 |
+
}
|
7 |
+
.instructions {
|
8 |
+
background-color: #056e33;
|
9 |
+
padding: 20px;
|
10 |
+
}
|
11 |
+
.footer {
|
12 |
+
text-align: center;
|
13 |
+
}
|