|
{ |
|
"cutoff_len": 1024, |
|
"save_step": 2000, |
|
"early_stop_test_step": 2000, |
|
"train_lora_candidate_num": 1, |
|
"train_lora_simultaneously_num": 1, |
|
"train_strategy": "optim", |
|
"lora": [ |
|
{ |
|
"name": "alpaca-mixlora-7b", |
|
"optim": "adamw", |
|
"lr": 3e-4, |
|
"batch_size": 16, |
|
"micro_batch_size": 4, |
|
"test_batch_size": 64, |
|
"num_epochs": 2, |
|
"r": 8, |
|
"lora_alpha": 16, |
|
"lora_dropout": 0.05, |
|
"target_modules": { |
|
"q_proj": false, |
|
"k_proj": false, |
|
"v_proj": false, |
|
"o_proj": false, |
|
"w1_proj": true, |
|
"w2_proj": true, |
|
"w3_proj": true |
|
}, |
|
"routing_strategy": "mixtral", |
|
"num_experts": 8, |
|
"top_k": 3, |
|
"act_fn": "silu", |
|
"data": "yahma/alpaca-cleaned", |
|
"prompt": "template/alpaca.json", |
|
"group_by_length": false, |
|
"expand_side": "right" |
|
} |
|
] |
|
} |