nafisehNik
commited on
Commit
•
1f9f283
1
Parent(s):
ef1e772
Update README.md
Browse files
README.md
CHANGED
@@ -29,4 +29,49 @@ widget:
|
|
29 |
کود گوگرد بنتونیتی تا پنج سال آینده به ۱۰۰ هزار تن در سال برسد.
|
30 |
example_title: Example 1
|
31 |
|
32 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
کود گوگرد بنتونیتی تا پنج سال آینده به ۱۰۰ هزار تن در سال برسد.
|
30 |
example_title: Example 1
|
31 |
|
32 |
+
---
|
33 |
+
|
34 |
+
|
35 |
+
# mT5 Persian Summary
|
36 |
+
|
37 |
+
This model is fine-tuned to generate summaries based on the input provided. It has been fine-tuned on a wide range of Persian news data, including [BBC news](https://huggingface.co/datasets/csebuetnlp/xlsum) and [pn_summary](https://huggingface.co/datasets/pn_summary).
|
38 |
+
|
39 |
+
## Usage
|
40 |
+
|
41 |
+
```
|
42 |
+
from transformers import AutoModelForSeq2SeqLM, MT5Tokenizer
|
43 |
+
|
44 |
+
model = AutoModelForSeq2SeqLM.from_pretrained('nafisehNik/mt5-persian-summary')
|
45 |
+
|
46 |
+
tokenizer = MT5Tokenizer.from_pretrained("nafisehNik/mt5-persian-summary")
|
47 |
+
|
48 |
+
|
49 |
+
# method for summary generation, using the global model and tokenizer
|
50 |
+
def generate_summary(model, abstract, num_beams = 2, repetition_penalty = 1.0,
|
51 |
+
length_penalty = 2.0, early_stopping = True, max_output_length = 120):
|
52 |
+
source_encoding=tokenizer(abstract, max_length=1000, padding="max_length", truncation=True, return_attention_mask=True, add_special_tokens=True, return_tensors="pt")
|
53 |
+
|
54 |
+
generated_ids=model.generate(
|
55 |
+
input_ids=source_encoding["input_ids"],
|
56 |
+
attention_mask=source_encoding["attention_mask"],
|
57 |
+
num_beams=num_beams,
|
58 |
+
max_length=max_output_length,
|
59 |
+
repetition_penalty=repetition_penalty,
|
60 |
+
length_penalty=length_penalty,
|
61 |
+
early_stopping=early_stopping,
|
62 |
+
use_cache=True
|
63 |
+
)
|
64 |
+
|
65 |
+
preds=[tokenizer.decode(gen_id, skip_special_tokens=True, clean_up_tokenization_spaces=True)
|
66 |
+
for gen_id in generated_ids]
|
67 |
+
|
68 |
+
return "".join(preds)
|
69 |
+
|
70 |
+
text = "YOUR INPUT TEXT"
|
71 |
+
result = generate_summary(model=model, abstract=text, num_beams=2, max_output_length=120)
|
72 |
+
```
|
73 |
+
|
74 |
+
|
75 |
+
## Citation
|
76 |
+
|
77 |
+
If you find this model useful, make a link to the huggingface model.
|