train model
Browse files- scripts/train_model.py +6 -3
scripts/train_model.py
CHANGED
@@ -7,7 +7,7 @@ from transformers import AutoConfig
|
|
7 |
from transformers import DataCollatorForLanguageModeling
|
8 |
|
9 |
|
10 |
-
x = input('Are you sure? [y/N]')
|
11 |
|
12 |
if x not in ('y', 'Y', 'yes'):
|
13 |
sys.exit(0)
|
@@ -222,8 +222,11 @@ print(model)
|
|
222 |
training_args = TrainingArguments(
|
223 |
output_dir='./results',
|
224 |
num_train_epochs=3,
|
225 |
-
per_device_train_batch_size=
|
226 |
-
per_device_eval_batch_size=
|
|
|
|
|
|
|
227 |
warmup_steps=500,
|
228 |
weight_decay=0.01,
|
229 |
logging_dir='./logs',
|
|
|
7 |
from transformers import DataCollatorForLanguageModeling
|
8 |
|
9 |
|
10 |
+
x = input('Are you sure? [y/N] ')
|
11 |
|
12 |
if x not in ('y', 'Y', 'yes'):
|
13 |
sys.exit(0)
|
|
|
222 |
training_args = TrainingArguments(
|
223 |
output_dir='./results',
|
224 |
num_train_epochs=3,
|
225 |
+
per_device_train_batch_size=4, # Adjust based on your GPU memory
|
226 |
+
per_device_eval_batch_size=4,
|
227 |
+
optim='adamw_bnb_8bit',
|
228 |
+
gradient_accumulation_steps=4,
|
229 |
+
gradient_checkpointing=True,
|
230 |
warmup_steps=500,
|
231 |
weight_decay=0.01,
|
232 |
logging_dir='./logs',
|