smohammadi commited on
Commit
ab2eea2
1 Parent(s): 8d96ff1

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +66 -0
config.yaml ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ tokenizer:
2
+ _component_: torchtune.models.gemma.gemma_tokenizer
3
+ path: ./target/gemma-2b/tokenizer.model
4
+ dataset:
5
+ _component_: torchtune.datasets.alpaca_dataset
6
+ seed: null
7
+ shuffle: true
8
+ model:
9
+ _component_: torchtune.models.gemma.lora_gemma_2b
10
+ lora_attn_modules:
11
+ - q_proj
12
+ - k_proj
13
+ - v_proj
14
+ apply_lora_to_mlp: true
15
+ lora_rank: 64
16
+ lora_alpha: 128
17
+ lora_dropout: 0.0
18
+ checkpointer:
19
+ _component_: torchtune.training.FullModelHFCheckpointer
20
+ checkpoint_dir: ./target/gemma-2b/
21
+ checkpoint_files:
22
+ - model-00001-of-00002.safetensors
23
+ - model-00002-of-00002.safetensors
24
+ recipe_checkpoint: null
25
+ output_dir: ${output_dir}/weights
26
+ model_type: GEMMA
27
+ resume_from_checkpoint: false
28
+ save_adapter_weights_only: false
29
+ optimizer:
30
+ _component_: torch.optim.AdamW
31
+ fused: false
32
+ lr: 2.0e-05
33
+ lr_scheduler:
34
+ _component_: torchtune.modules.get_cosine_schedule_with_warmup
35
+ num_warmup_steps: 10
36
+ loss:
37
+ _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
38
+ batch_size: 4
39
+ epochs: 1
40
+ max_steps_per_epoch: 10
41
+ gradient_accumulation_steps: 4
42
+ compile: false
43
+ device: mps
44
+ enable_activation_checkpointing: true
45
+ enable_activation_offloading: false
46
+ dtype: bf16
47
+ metric_logger:
48
+ _component_: torchtune.training.metric_logging.DiskLogger
49
+ log_dir: ${output_dir}
50
+ output_dir: ./target/gemma_tmp
51
+ log_every_n_steps: 1
52
+ log_peak_memory_stats: false
53
+ profiler:
54
+ _component_: torchtune.training.setup_torch_profiler
55
+ enabled: false
56
+ output_dir: ${output_dir}/profiling_outputs
57
+ cpu: true
58
+ cuda: true
59
+ profile_memory: false
60
+ with_stack: false
61
+ record_shapes: true
62
+ with_flops: false
63
+ wait_steps: 5
64
+ warmup_steps: 5
65
+ active_steps: 2
66
+ num_cycles: 1