mtasic85 commited on
Commit
bb8cdc8
1 Parent(s): 7911b8f
Files changed (1) hide show
  1. scripts/model.yaml +4 -2
scripts/model.yaml CHANGED
@@ -56,7 +56,8 @@ train:
56
  global_batch_size: 512
57
 
58
  # Number of samples per data-parallel rank (type: int, default: 4)
59
- micro_batch_size: 16 # 4
 
60
 
61
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
62
  lr_warmup_steps: 2000
@@ -102,7 +103,8 @@ eval:
102
 
103
  # Optimizer-related arguments
104
  optimizer:
105
- class_path: torch.optim.AdamW
 
106
  # class_path: bitsandbytes.optim.PagedAdamW
107
  # class_path: bitsandbytes.optim.AdamW8bit
108
  # class_path: bitsandbytes.optim.PagedAdamW8bit
 
56
  global_batch_size: 512
57
 
58
  # Number of samples per data-parallel rank (type: int, default: 4)
59
+ # micro_batch_size: 16
60
+ micro_batch_size: 32
61
 
62
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
63
  lr_warmup_steps: 2000
 
103
 
104
  # Optimizer-related arguments
105
  optimizer:
106
+ # class_path: torch.optim.AdamW
107
+ class_path: torch.optim.PagedAdamW
108
  # class_path: bitsandbytes.optim.PagedAdamW
109
  # class_path: bitsandbytes.optim.AdamW8bit
110
  # class_path: bitsandbytes.optim.PagedAdamW8bit