Spaces:
Runtime error
Runtime error
daresearch
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -109,7 +109,7 @@ model = FastLanguageModel.get_peft_model(
|
|
109 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
|
110 |
"gate_proj", "up_proj", "down_proj"],
|
111 |
lora_alpha=16,
|
112 |
-
lora_dropout=0, # Supports any, but = 0 is optimized
|
113 |
bias="none", # Supports any, but = "none" is optimized
|
114 |
use_gradient_checkpointing="unsloth", # True or "unsloth" for very long context
|
115 |
random_state=3407,
|
@@ -124,18 +124,18 @@ trainer = SFTTrainer(
|
|
124 |
eval_dataset=valid_dataset,
|
125 |
dataset_text_field="text",
|
126 |
max_seq_length=max_seq_length,
|
127 |
-
dataset_num_proc=
|
128 |
packing=True, # Enable sequence packing
|
129 |
args=TrainingArguments(
|
130 |
-
per_device_train_batch_size=
|
131 |
-
gradient_accumulation_steps=
|
132 |
warmup_steps=5,
|
133 |
-
max_steps
|
134 |
-
|
135 |
learning_rate=2e-4,
|
136 |
-
fp16=
|
137 |
-
bf16=
|
138 |
-
logging_steps=
|
139 |
evaluation_strategy="steps",
|
140 |
eval_steps=50, # Evaluate less frequently
|
141 |
max_grad_norm=1.0, # Add gradient clipping
|
|
|
109 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
|
110 |
"gate_proj", "up_proj", "down_proj"],
|
111 |
lora_alpha=16,
|
112 |
+
lora_dropout=0.05, # Supports any, but = 0 is optimized
|
113 |
bias="none", # Supports any, but = "none" is optimized
|
114 |
use_gradient_checkpointing="unsloth", # True or "unsloth" for very long context
|
115 |
random_state=3407,
|
|
|
124 |
eval_dataset=valid_dataset,
|
125 |
dataset_text_field="text",
|
126 |
max_seq_length=max_seq_length,
|
127 |
+
dataset_num_proc=8, # Increase parallelism
|
128 |
packing=True, # Enable sequence packing
|
129 |
args=TrainingArguments(
|
130 |
+
per_device_train_batch_size=32, # Lower batch size to prevent memory issues
|
131 |
+
gradient_accumulation_steps=1, # Maintain effective batch size
|
132 |
warmup_steps=5,
|
133 |
+
max_steps=-1, # Train in smaller chunks
|
134 |
+
num_train_epochs=3, # Test with fewer epochs
|
135 |
learning_rate=2e-4,
|
136 |
+
fp16=not is_bfloat16_supported(),
|
137 |
+
bf16=is_bfloat16_supported(),
|
138 |
+
logging_steps=10, # Log less frequently
|
139 |
evaluation_strategy="steps",
|
140 |
eval_steps=50, # Evaluate less frequently
|
141 |
max_grad_norm=1.0, # Add gradient clipping
|