|
|
|
model_name_or_path: mistralai/Mistral-7B-Instruct-v0.3 |
|
adapter_name_or_path: chchen/Mistral-7B-Instruct-v0.3-ORPO |
|
|
|
|
|
stage: sft |
|
do_train: true |
|
finetuning_type: lora |
|
lora_target: all |
|
|
|
|
|
dataset: bct_non_cot_sft_1000 |
|
dataset_dir: data_private |
|
template: mistral |
|
cutoff_len: 1024 |
|
|
|
overwrite_cache: true |
|
preprocessing_num_workers: 16 |
|
|
|
|
|
output_dir: saves/Mistral-7B-Instruct-v0.3/lora/orpo-sft |
|
logging_steps: 10 |
|
save_steps: 500 |
|
plot_loss: true |
|
overwrite_output_dir: true |
|
save_total_limit: 3 |
|
load_best_model_at_end: true |
|
push_to_hub: true |
|
hub_model_id: chchen/Mistral-7B-Instruct-v0.3-ORPO-SFT |
|
|
|
|
|
per_device_train_batch_size: 2 |
|
gradient_accumulation_steps: 8 |
|
learning_rate: 0.000005 |
|
num_train_epochs: 3.0 |
|
lr_scheduler_type: cosine |
|
warmup_steps: 0.1 |
|
fp16: true |
|
|
|
|
|
val_size: 0.1 |
|
per_device_eval_batch_size: 2 |
|
evaluation_strategy: steps |
|
eval_steps: 500 |
|
|