kamau1 commited on
Commit
5ed5b30
·
1 Parent(s): 7c7b8b8

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. app.py +118 -0
  2. index.html +52 -0
  3. style.css +13 -0
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Created By Lewis Kamau Kimaru
3
+ Sema fastapi backend
4
+ August 2023
5
+ '''
6
+
7
+ from fastapi import FastAPI, HTTPException, Request
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ from fastapi.responses import HTMLResponse
10
+ from flores200_codes import flores_codes
11
+ import gradio as gr
12
+ import ctranslate2
13
+ import sentencepiece as spm
14
+ import fasttext
15
+ import uvicorn
16
+ from pyngrok import ngrok
17
+ import nest_asyncio
18
+ import os
19
+
20
+ app = FastAPI()
21
+
22
+ # Set your ngrok authtoken
23
+ #ngrok.set_auth_token("2UAhCqf5zP0cCgJzeadNANkbIqx_7ZJvhkDSNWccqMX2hyxXP")
24
+ #ngrok.set_auth_token("2S6xeFEoSVFWr2egtDRcqgeUtSx_2juefHFkEW6nGbpRHS37W")
25
+ #ngrok.set_auth_token("2UAmdjHdAFV9x84TdyEknIfNhYk_4Ye8n4YK7ZhfCMob3yPBh")
26
+ #ngrok.set_auth_token("2UAqm26HuWiWvQjzK58xYufSGpy_6tStKSyLLyR9f7pcezh6R")
27
+ ngrok.set_auth_token("2UGQqzZoI3bx7SSk8H4wuFC3iaC_2WniWyNAsW5fd2rFyKVq1")
28
+
29
+
30
+ fasttext.FastText.eprint = lambda x: None
31
+
32
+ # Load the model and tokenizer ..... only once!
33
+ beam_size = 1 # change to a smaller value for faster inference
34
+ device = "cpu" # or "cuda"
35
+
36
+ # Language Prediction model
37
+ print("\nimporting Language Prediction model")
38
+ lang_model_file = "Sema/lid218e.bin"
39
+ lang_model_full_path = os.path.join(os.path.dirname(__file__), lang_model_file)
40
+ lang_model = fasttext.load_model(lang_model_full_path)
41
+
42
+
43
+ # Load the source SentencePiece model
44
+ print("\nimporting SentencePiece model")
45
+ sp_model_file = "Sema/spm.model"
46
+ sp_model_full_path = os.path.join(os.path.dirname(__file__), sp_model_file)
47
+ sp = spm.SentencePieceProcessor()
48
+ sp.load(sp_model_full_path)
49
+
50
+ # Import The Translator model
51
+ print("\nimporting Translator model")
52
+ ct_model_file = "Sema/sematrans-3.3B"
53
+ ct_model_full_path = os.path.join(os.path.dirname(__file__), ct_model_file)
54
+ translator = ctranslate2.Translator(ct_model_full_path, device)
55
+
56
+ print('\nDone importing models\n')
57
+
58
+
59
+ def translate_text(userinput: str, target_lang: str):
60
+ source_sents = [userinput]
61
+ source_sents = [sent.strip() for sent in source_sents]
62
+ target_prefix = [[target_lang]] * len(source_sents)
63
+
64
+ # Predict the source language
65
+ predictions = lang_model.predict(source_sents[0], k=1)
66
+ source_lang = predictions[0][0].replace('__label__', '')
67
+
68
+ # Subword the source sentences
69
+ source_sents_subworded = sp.encode(source_sents, out_type=str)
70
+ source_sents_subworded = [[source_lang] + sent + ["</s>"] for sent in source_sents_subworded]
71
+
72
+ # Translate the source sentences
73
+ translations = translator.translate_batch(
74
+ source_sents_subworded,
75
+ batch_type="tokens",
76
+ max_batch_size=2024,
77
+ beam_size=beam_size,
78
+ target_prefix=target_prefix,
79
+ )
80
+ translations = [translation[0]['tokens'] for translation in translations]
81
+
82
+ # Desubword the target sentences
83
+ translations_desubword = sp.decode(translations)
84
+ translations_desubword = [sent[len(target_lang):] for sent in translations_desubword]
85
+
86
+ # Return the source language and the translated text
87
+ return source_lang, translations_desubword
88
+
89
+
90
+ @app.get("/")
91
+ def read_root():
92
+ return {"message": "Welcome to the Sema Translation API! \nThis API was created by Lewsi Kamau Kimaru"}
93
+
94
+
95
+ @app.post("/translate/")
96
+ async def translate_endpoint(request: Request):
97
+ data = await request.json()
98
+ userinput = data.get("userinput")
99
+ target_lang = data.get("target_lang")
100
+ print(f"\n Target Language; {target_lang}, User Input: {userinput}\n")
101
+
102
+ if not userinput or not target_lang:
103
+ raise HTTPException(status_code=422, detail="Both 'userinput' and 'target_lang' are required.")
104
+
105
+ source_lang, translated_text = translate_text(userinput, target_lang)
106
+ print(f"\nsource_language: {source_lang}, Translated Text: {translated_text}\n\n")
107
+ return {
108
+ "source_language": source_lang,
109
+ "translated_text": translated_text[0],
110
+ }
111
+
112
+ ngrok_tunnel = ngrok.connect(7860)
113
+ public_url = ngrok_tunnel.public_url
114
+ print('\nPublic URL✅:', public_url)
115
+ nest_asyncio.apply()
116
+
117
+ print("\nAPI starting .......\n")
118
+ uvicorn.run(app, port=7860)
index.html ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width">
6
+ <title>Sema Translator</title>
7
+ <link rel="stylesheet" href="style.css" />
8
+ <py-script src="modules/app.py"></py-script>
9
+ </head>
10
+ <body>
11
+ <div class="Header">
12
+ <h1>Sema Translator</h1>
13
+ <p>Unlock the Power of Global Communication with Sema Translator! Seamlessly bridging language barriers. With support for over 200 languages, Sema Translator opens up a realm of possibilities for building truly global applications.</p>
14
+
15
+ </div>
16
+
17
+ <div class="LanguageDropdown">
18
+ <h2>Select a Language:</h2>
19
+ <select id="languageSelect">
20
+ <!-- Add options dynamically -->
21
+ </select>
22
+ </div>
23
+
24
+ <div class="instructions">
25
+ <p>Use the following python code to access the api endpoint</p>
26
+ <pre style="text-align: left;">
27
+ import requests
28
+
29
+ url = "{public_url}/translate/"
30
+ data = {
31
+ "userinput": "rũcinĩ rwega, niwokĩra wega?",
32
+ "target_lang": "eng_Latn",
33
+ }
34
+
35
+ response = requests.post(url, json=data)
36
+ result = response.json()
37
+
38
+ print(result)
39
+
40
+ source_language = result['source_language']
41
+ print("Source Language:", source_language)
42
+
43
+ translation = result['translated_text']
44
+ print("Translated text:", translation)
45
+ </pre>
46
+ </div>
47
+
48
+ <div class="footer">
49
+ <h1>Created by Lewis Kamau Kiamru</h1>
50
+ </div>
51
+ </body>
52
+ </html>
style.css ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .Header {
2
+ text-align: center;
3
+ }
4
+ .LanguageDropdown {
5
+ text-align: center;
6
+ }
7
+ .instructions {
8
+ background-color: #056e33;
9
+ padding: 20px;
10
+ }
11
+ .footer {
12
+ text-align: center;
13
+ }