base_model: unsloth/Phi-3-mini-4k-instruct model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer trust_remote_code: true peft_layer_replication: - [0, 8] - [4, 12] - [8, 16] - [12, 20] - [16, 24] - [20, 28] - [24, 32] load_in_8bit: false load_in_4bit: true strict: false datasets: - path: /workspace/datasets/dolphin-2.9/dolphin201-sharegpt2.jsonl type: sharegpt conversation: chatml # - path: /workspace/datasets/dolphin-2.9/Ultrachat200kunfiltered.jsonl # type: sharegpt # conversation: chatml - path: /workspace/datasets/dolphin-2.9/dolphin-coder-translate-sharegpt2.jsonl type: sharegpt conversation: chatml - path: /workspace/datasets/dolphin-2.9/dolphin-coder-codegen-sharegpt2.jsonl type: sharegpt conversation: chatml - path: /workspace/datasets/dolphin-2.9/m-a-p_Code-Feedback-sharegpt-unfiltered.jsonl type: sharegpt conversation: chatml - path: /workspace/datasets/dolphin-2.9/m-a-p_CodeFeedback-Filtered-Instruction-sharegpt-unfiltered.jsonl type: sharegpt conversation: chatml - path: /workspace/datasets/dolphin-2.9/not_samantha_norefusals.jsonl type: sharegpt conversation: chatml - path: /workspace/datasets/dolphin-2.9/Orca-Math-resort-unfiltered.jsonl type: sharegpt conversation: chatml - path: /workspace/datasets/dolphin-2.9/agent_instruct_react_unfiltered.jsonl type: sharegpt conversation: chatml - path: /workspace/datasets/dolphin-2.9/toolbench_instruct_j1s1_3k_unfiltered.jsonl type: sharegpt conversation: chatml - path: /workspace/datasets/dolphin-2.9/toolbench_negative_unfiltered.jsonl type: sharegpt conversation: chatml - path: /workspace/datasets/dolphin-2.9/toolbench_react_10p_unfiltered.jsonl type: sharegpt conversation: chatml - path: /workspace/datasets/dolphin-2.9/toolbench_tflan_cot_30p_unfiltered.jsonl type: sharegpt conversation: chatml - path: /workspace/datasets/dolphin-2.9/openhermes200k_unfiltered.jsonl type: sharegpt conversation: chatml # - path: /workspace/datasets/dolphin-2.9/SystemConversations.jsonl # type: sharegpt # conversation: chatml chat_template: chatml dataset_prepared_path: dolphin-phi3-prepared val_set_size: 0 output_dir: ./dolphin-phi3-5b sequence_len: 4096 sample_packing: true pad_to_sequence_len: true adapter: qlora lora_model_dir: lora_r: 64 lora_alpha: 32 lora_dropout: 0.05 lora_target_linear: true lora_fan_in_fan_out: lora_modules_to_save: ['embed_tokens', 'lm_head'] wandb_project: dolphin-2.9-phi3-5b wandb_entity: wandb_watch: wandb_name: wandb_log_model: gradient_accumulation_steps: 4 micro_batch_size: 8 num_epochs: 4 optimizer: adamw_8bit # adam_beta2: 0.95 # adam_epsilon: 0.00001 max_grad_norm: 1.0 lr_scheduler: cosine learning_rate: 5e-6 train_on_inputs: false group_by_length: false bf16: auto fp16: tf32: true gradient_checkpointing: true gradient_checkpointing_kwargs: use_reentrant: True early_stopping_patience: resume_from_checkpoint: local_rank: logging_steps: 1 xformers_attention: flash_attention: true warmup_steps: 100 evals_per_epoch: 4 saves_per_epoch: 1 debug: # deepspeed: deepspeed_configs/zero2.json weight_decay: 0.1 fsdp: fsdp_config: # resize_token_embeddings_to_32x: true special_tokens: eos_token: "<|im_end|>" pad_token: "<|endoftext|>" tokens: - "<|im_start|>" - "<|im_end|>"