daresearch commited on
Commit
7948b59
·
verified ·
1 Parent(s): a35a06c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -13
app.py CHANGED
@@ -120,24 +120,25 @@ model = FastLanguageModel.get_peft_model(
120
  trainer = SFTTrainer(
121
  model=model,
122
  tokenizer=tokenizer,
123
- train_dataset=train_dataset, # Updated to use train_dataset
124
- eval_dataset=valid_dataset, # Added eval_dataset for validation
125
  dataset_text_field="text",
126
  max_seq_length=max_seq_length,
127
- dataset_num_proc=2,
128
- packing=False, # Can make training 5x faster for short sequences.
129
  args=TrainingArguments(
130
- per_device_train_batch_size=32,
131
- gradient_accumulation_steps=2,
132
  warmup_steps=5,
133
- max_steps=-1,
134
- num_train_epochs=3,
135
  learning_rate=2e-4,
136
- fp16=not is_bfloat16_supported(),
137
- bf16=is_bfloat16_supported(),
138
- logging_steps=1,
139
- evaluation_strategy="steps", # Enables evaluation during training
140
- eval_steps=10, # Frequency of evaluation
 
141
  optim="adamw_8bit",
142
  weight_decay=0.01,
143
  lr_scheduler_type="linear",
 
120
  trainer = SFTTrainer(
121
  model=model,
122
  tokenizer=tokenizer,
123
+ train_dataset=train_dataset,
124
+ eval_dataset=valid_dataset,
125
  dataset_text_field="text",
126
  max_seq_length=max_seq_length,
127
+ dataset_num_proc=4, # Increase parallelism
128
+ packing=True, # Enable sequence packing
129
  args=TrainingArguments(
130
+ per_device_train_batch_size=4, # Lower batch size to prevent memory issues
131
+ gradient_accumulation_steps=4, # Maintain effective batch size
132
  warmup_steps=5,
133
+ max_steps=702, # Train in smaller chunks
134
+ #num_train_epochs=1, # Test with fewer epochs
135
  learning_rate=2e-4,
136
+ fp16=False, # Disable mixed precision temporarily
137
+ bf16=False,
138
+ logging_steps=25, # Log less frequently
139
+ evaluation_strategy="steps",
140
+ eval_steps=50, # Evaluate less frequently
141
+ max_grad_norm=1.0, # Add gradient clipping
142
  optim="adamw_8bit",
143
  weight_decay=0.01,
144
  lr_scheduler_type="linear",