|
|
|
import sentencepiece |
|
import torch |
|
import gradio as gr |
|
|
|
|
|
from transformers import MarianMTModel, AutoTokenizer, T5ForConditionalGeneration |
|
from deep_translator import GoogleTranslator |
|
|
|
def summarize(message): |
|
tokenizer_bn_en = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-bn-en") |
|
model_bn_en = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-bn-en") |
|
tokenizer = AutoTokenizer.from_pretrained('t5-base') |
|
model = T5ForConditionalGeneration.from_pretrained('t5-base') |
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
with torch.no_grad(): |
|
|
|
input_ids = tokenizer_bn_en.encode(message, return_tensors='pt') |
|
|
|
|
|
output_ids = model_bn_en.generate(input_ids) |
|
output_text1 = tokenizer_bn_en.decode(output_ids[0], skip_special_tokens=True) |
|
|
|
|
|
input_text2 = output_text1 |
|
|
|
|
|
input_ids = tokenizer.encode(input_text2, return_tensors='pt') |
|
summary_ids = model.generate(input_ids, max_length=50, num_beams=30, early_stopping=True) |
|
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) |
|
summarized = GoogleTranslator(source='en', target='bn').translate(summary) |
|
return summarized |
|
|
|
|
|
|
|
iface = gr.Interface(fn=summarize, inputs="text", outputs="text") |
|
iface.launch() |
|
|