File size: 2,621 Bytes
463444e
 
 
a9c115a
c3e5a3b
463444e
 
06d2814
a9c115a
463444e
c3e5a3b
463444e
 
 
 
 
06d2814
a9c115a
 
06d2814
 
 
 
463444e
 
 
06d2814
 
 
463444e
 
06d2814
 
463444e
06d2814
 
463444e
 
 
 
 
06d2814
 
463444e
06d2814
463444e
 
 
06d2814
463444e
06d2814
 
 
463444e
06d2814
463444e
a9c115a
06d2814
463444e
06d2814
a9c115a
463444e
 
 
06d2814
 
 
463444e
 
 
 
9e6f72f
463444e
06d2814
b0ef2d7
463444e
06d2814
463444e
 
a9c115a
 
 
463444e
06d2814
463444e
 
 
a9c115a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import torch
import gradio as gr
import time
import asyncio
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from flores200_codes import flores_codes

# Load models and tokenizers once during initialization
async def load_models():
    model_name_dict = {
        "nllb-distilled-600M": "facebook/nllb-200-distilled-600M",
    }

    model_dict = {}

    for call_name, real_name in model_name_dict.items():
        print("\tLoading model:", call_name)
        model = await asyncio.to_thread(AutoModelForSeq2SeqLM.from_pretrained, real_name)
        tokenizer = await asyncio.to_thread(AutoTokenizer.from_pretrained, real_name)
        model_dict[call_name] = {
            "model": model,
            "tokenizer": tokenizer,
        }

    return model_dict

# Translate text using preloaded models and tokenizers
def translate_text(source_lang, target_lang, input_text, model_dict):
    model_name = "nllb-distilled-600M"

    start_time = time.time()
    source_code = flores_codes[source_lang]
    target_code = flores_codes[target_lang]

    model = model_dict[model_name]["model"]
    tokenizer = model_dict[model_name]["tokenizer"]

    translator = pipeline(
        "translation",
        model=model,
        tokenizer=tokenizer,
        src_lang=source_code,
        tgt_lang=target_code,
    )
    translated_output = translator(input_text, max_length=400)

    end_time = time.time()

    translated_result = {
        "inference_time": end_time - start_time,
        "source": source_lang,
        "target": target_lang,
        "result": translated_output[0]["translation_text"],
    }
    return translated_result

async def main():
    print("\tInitializing models")

    # Load models and tokenizers
    model_dict = await load_models()

    lang_codes = list(flores_codes.keys())
    inputs = [
        gr.inputs.Dropdown(lang_codes, default="English", label="Source Language"),
        gr.inputs.Dropdown(lang_codes, default="Nepali", label="Target Language"),
        gr.inputs.Textbox(lines=5, label="Input Text"),
    ]

    outputs = gr.outputs.JSON()

    title = "The Master Betters Translator"

    app_description = (
        "This is a beta version of The Master Betters Translator that utilizes pre-trained language models for translation."
    )
    examples = [["English", "Nepali", "Hello, how are you?"]]

    gr.Interface(
        translate_text,
        inputs,
        outputs,
        title=title,
        description=app_description,
        examples=examples,
        examples_per_page=50,
    ).launch()

if __name__ == "__main__":
    asyncio.run(main())