File size: 2,733 Bytes
00bfa33 f50408f 00bfa33 c3e5a3b 463444e 93d168d 463444e ccf30c7 463444e 06d2814 93d168d 06d2814 463444e 00bfa33 06d2814 00bfa33 06d2814 463444e 00bfa33 48ff56c 93d168d 00bfa33 93d168d 48ff56c 00bfa33 48ff56c 9003587 48ff56c 00bfa33 9003587 48ff56c 00bfa33 48ff56c 9003587 48ff56c 463444e 93d168d 06d2814 463444e 9003587 463444e 00bfa33 463444e 93d168d 00bfa33 9003587 00bfa33 463444e e37abe5 463444e 9003587 463444e 9003587 463444e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import os
import torch
import gradio as gr
import time
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from flores200_codes import flores_codes
def load_models():
model_name_dict = {
"nllb-distilled-1.3B": "facebook/nllb-200-distilled-1.3B",
}
model_dict = {}
for call_name, real_name in model_name_dict.items():
print("\tLoading model:", call_name)
model = AutoModelForSeq2SeqLM.from_pretrained(real_name)
tokenizer = AutoTokenizer.from_pretrained(real_name)
model_dict[call_name] = {
"model": model,
"tokenizer": tokenizer,
}
return model_dict
# Load models and tokenizers once during initialization
model_dict = load_models()
# Translate text using preloaded models and tokenizers
def translate_text(source, target, text):
model_name = "nllb-distilled-600M"
if model_name in model_dict and model_dict[model_name]["model"] is not None:
model = model_dict[model_name]["model"]
tokenizer = model_dict[model_name]["tokenizer"]
start_time = time.time()
source = flores_codes[source]
target = flores_codes[target]
translator = pipeline(
"translation",
model=model,
tokenizer=tokenizer,
src_lang=source,
tgt_lang=target,
)
output = translator(text, max_length=400)
end_time = time.time()
output = output[0]["translation_text"]
result = {
"inference_time": end_time - start_time,
"source": source,
"target": target,
"result": output,
}
return result
else:
raise KeyError(f"Model '{model_name}' not found in model_dict")
if __name__ == "__main__":
print("\tInitializing models")
lang_codes = list(flores_codes.keys())
inputs = [
gr.inputs.Dropdown(lang_codes, default="English", label="Source"),
gr.inputs.Dropdown(lang_codes, default="Nepali", label="Target"),
gr.inputs.Textbox(lines=5, label="Input text"),
]
outputs = gr.outputs.JSON()
title = "The Master Betters Translator"
desc = "This is a beta version of The Master Betters Translator that utilizes pre-trained language models for translation. To use this app you need to have chosen the source and target language with your input text to get the output."
description = (
f"{desc}"
)
examples = [["English", "Nepali", "Hello, how are you?"]]
gr.Interface(
translate_text,
inputs,
outputs,
title=title,
description=description,
examples=examples,
examples_per_page=50,
).launch()
|