exp_name: 'vi-en-fix-v1' | |
# Training dataset (from Huggingface) | |
# data_source: "MedCat/MedCAT-SFT-v1" | |
data_source: "MedCat/MedCAT-SFT-v1.1" | |
# The base model (from HuggingFace model hub) | |
# model_name: "Qwen/Qwen2.5-0.5B" | |
model_name: "MedCat/MedCAT-PT-Qwen2.5-0.5B-v1-stream-data-v1-checkpoint-600000" | |
# model_name: "MedCat/MedCAT-PT-Apollo-0.5B-v1-stream-data-v1-checkpoint-600000" | |
# Tokenizer | |
tokenizer_batch_size: 1_000 | |
max_length: 512 | |
# Checkpoints configuration | |
output_folder: "./checkpoints/MedCAT-SFT" # Where to save checkpoints during the training | |
save_total_limit: 2 # Limit on number of checkpoints to keep | |
save_strategy: "steps" # Saving strategy (either 'steps' or 'epoch') | |
save_steps: 500 # Save model every ... steps | |
# LoRA | |
r: 8 # Rank of the low-rank matrices | |
lora_alpha: 32 # LoRA alpha | |
lora_dropout: 0.1 # Dropout rate | |
bias: "none" # Whether to train biases ("none", "all", or "lora_only") | |
task_type: "CAUSAL_LM" # Task type: casual language modeling | |
# Logging configuration | |
logging_dir: "./logs/MedCAT-SFT" # Directory for logs + base_model + data_version | |
logging_steps: 100 # Frequency of logging | |
# Training configuration | |
per_device_train_batch_size: 4 # Training batch size | |
per_device_eval_batch_size: 4 # Evaluation batch size | |
num_train_epochs: 2 # Number of epochs | |
# max_steps: 500 # Total training steps (or use num_train_epochs instead) | |
eval_steps: 500 # Frequency of evaluation. Should equal to logging_steps (can be different, but should be equal) | |
evaluation_strategy: "steps" # Evaluation strategy (either 'steps' or 'epoch') | |
seed: 3407 # Random seed for reproducibility | |
gradient_accumulation_steps: 8 | |
learning_rate: 0.00001 |