tokenizer: _component_: torchtune.models.gemma.gemma_tokenizer path: ./target/gemma-2b/tokenizer.model dataset: _component_: torchtune.datasets.alpaca_dataset seed: null shuffle: true model: _component_: torchtune.models.gemma.lora_gemma_2b lora_attn_modules: - q_proj - k_proj - v_proj apply_lora_to_mlp: true lora_rank: 64 lora_alpha: 128 lora_dropout: 0.0 checkpointer: _component_: torchtune.training.FullModelHFCheckpointer checkpoint_dir: ./target/gemma-2b/ checkpoint_files: - model-00001-of-00002.safetensors - model-00002-of-00002.safetensors recipe_checkpoint: null output_dir: ${output_dir}/weights model_type: GEMMA resume_from_checkpoint: false save_adapter_weights_only: false optimizer: _component_: torch.optim.AdamW fused: false lr: 2.0e-05 lr_scheduler: _component_: torchtune.modules.get_cosine_schedule_with_warmup num_warmup_steps: 10 loss: _component_: torchtune.modules.loss.CEWithChunkedOutputLoss batch_size: 4 epochs: 1 max_steps_per_epoch: 10 gradient_accumulation_steps: 4 compile: false device: mps enable_activation_checkpointing: true enable_activation_offloading: false dtype: bf16 metric_logger: _component_: torchtune.training.metric_logging.DiskLogger log_dir: ${output_dir} output_dir: ./target/gemma_tmp log_every_n_steps: 1 log_peak_memory_stats: false profiler: _component_: torchtune.training.setup_torch_profiler enabled: false output_dir: ${output_dir}/profiling_outputs cpu: true cuda: true profile_memory: false with_stack: false record_shapes: true with_flops: false wait_steps: 5 warmup_steps: 5 active_steps: 2 num_cycles: 1