Update README.md
Browse files
README.md
CHANGED
@@ -148,25 +148,33 @@ Hungarian, Slovak, Slovenian, Estonian, Polish, Latvian, Swedish, Maltese, Irish
|
|
148 |
To translate from Spanish to Catalan using Huggingface's AutoModel class on a single sentence you can use the following code:
|
149 |
|
150 |
```python
|
|
|
151 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
152 |
|
153 |
model_id = 'BSC-LT/salamandraTA-2b'
|
154 |
|
|
|
155 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
156 |
model = AutoModelForCausalLM.from_pretrained(model_id)
|
157 |
|
|
|
|
|
|
|
|
|
158 |
src_lang_code = 'Spanish'
|
159 |
tgt_lang_code = 'Catalan'
|
160 |
sentence = 'Ayer se fue, tomó sus cosas y se puso a navegar.'
|
161 |
|
162 |
prompt = f'[{src_lang_code}] {sentence} \n[{tgt_lang_code}]'
|
163 |
|
164 |
-
|
165 |
-
|
|
|
166 |
input_length = input_ids.shape[1]
|
167 |
|
168 |
-
generated_text = tokenizer.decode(output_ids[0, input_length:
|
169 |
-
|
|
|
170 |
```
|
171 |
|
172 |
<br>
|
|
|
148 |
To translate from Spanish to Catalan using Huggingface's AutoModel class on a single sentence you can use the following code:
|
149 |
|
150 |
```python
|
151 |
+
import torch
|
152 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
153 |
|
154 |
model_id = 'BSC-LT/salamandraTA-2b'
|
155 |
|
156 |
+
# Load tokenizer and model
|
157 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
158 |
model = AutoModelForCausalLM.from_pretrained(model_id)
|
159 |
|
160 |
+
# Move model to GPU if available
|
161 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
162 |
+
model.to(device)
|
163 |
+
|
164 |
src_lang_code = 'Spanish'
|
165 |
tgt_lang_code = 'Catalan'
|
166 |
sentence = 'Ayer se fue, tomó sus cosas y se puso a navegar.'
|
167 |
|
168 |
prompt = f'[{src_lang_code}] {sentence} \n[{tgt_lang_code}]'
|
169 |
|
170 |
+
# Tokenize and move inputs to the same device as the model
|
171 |
+
input_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device)
|
172 |
+
output_ids = model.generate(input_ids, max_length=500, num_beams=5)
|
173 |
input_length = input_ids.shape[1]
|
174 |
|
175 |
+
generated_text = tokenizer.decode(output_ids[0, input_length:], skip_special_tokens=True).strip()
|
176 |
+
print(generated_text)
|
177 |
+
#Ahir se'n va anar, va agafar les seves coses i es va posar a navegar.
|
178 |
```
|
179 |
|
180 |
<br>
|