Update README.md
Browse files
README.md
CHANGED
@@ -27,7 +27,40 @@ additional external data resources, including back-translated news articles, FLO
|
|
27 |
The base model was [facebok/nllb-200-1.3B](https://huggingface.co/facebook/nllb-200-1.3B),
|
28 |
with tokens adapted to add support for languages not originally included.
|
29 |
|
30 |
-
# Usage
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
# Evaluation metrics
|
33 |
|
|
|
27 |
The base model was [facebok/nllb-200-1.3B](https://huggingface.co/facebook/nllb-200-1.3B),
|
28 |
with tokens adapted to add support for languages not originally included.
|
29 |
|
30 |
+
# Usage example
|
31 |
+
|
32 |
+
```python
|
33 |
+
tokenizer = transformers.NllbTokenizer.from_pretrained(
|
34 |
+
'Sunbird/translate-nllb-1.3b-salt')
|
35 |
+
model = transformers.M2M100ForConditionalGeneration.from_pretrained(
|
36 |
+
'Sunbird/translate-nllb-1.3b-salt')
|
37 |
+
|
38 |
+
text = "Where is the hospital?"
|
39 |
+
source_language = 'eng'
|
40 |
+
target_language = 'lug'
|
41 |
+
|
42 |
+
language_tokens = {
|
43 |
+
'eng': 256047,
|
44 |
+
'ach': 256111,
|
45 |
+
'lgg': 256008,
|
46 |
+
'lug': 256110,
|
47 |
+
'nyn': 256002,
|
48 |
+
'teo': 256006,
|
49 |
+
}
|
50 |
+
|
51 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
52 |
+
inputs = tokenizer(text, return_tensors="pt").to(device)
|
53 |
+
inputs['input_ids'][0][0] = language_tokens[source_language]
|
54 |
+
translated_tokens = model.to(device).generate(
|
55 |
+
**inputs,
|
56 |
+
forced_bos_token_id=language_tokens[target_language],
|
57 |
+
max_length=100,
|
58 |
+
num_beams=5,
|
59 |
+
)
|
60 |
+
|
61 |
+
result = tokenizer.batch_decode(
|
62 |
+
translated_tokens, skip_special_tokens=True)[0]
|
63 |
+
```
|
64 |
|
65 |
# Evaluation metrics
|
66 |
|