mtasic85 commited on
Commit
4000a4d
1 Parent(s): bb8cdc8
Files changed (1) hide show
  1. scripts/model.yaml +3 -4
scripts/model.yaml CHANGED
@@ -56,8 +56,8 @@ train:
56
  global_batch_size: 512
57
 
58
  # Number of samples per data-parallel rank (type: int, default: 4)
59
- # micro_batch_size: 16
60
- micro_batch_size: 32
61
 
62
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
63
  lr_warmup_steps: 2000
@@ -103,8 +103,7 @@ eval:
103
 
104
  # Optimizer-related arguments
105
  optimizer:
106
- # class_path: torch.optim.AdamW
107
- class_path: torch.optim.PagedAdamW
108
  # class_path: bitsandbytes.optim.PagedAdamW
109
  # class_path: bitsandbytes.optim.AdamW8bit
110
  # class_path: bitsandbytes.optim.PagedAdamW8bit
 
56
  global_batch_size: 512
57
 
58
  # Number of samples per data-parallel rank (type: int, default: 4)
59
+ micro_batch_size: 16
60
+ # micro_batch_size: 15
61
 
62
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
63
  lr_warmup_steps: 2000
 
103
 
104
  # Optimizer-related arguments
105
  optimizer:
106
+ class_path: torch.optim.AdamW
 
107
  # class_path: bitsandbytes.optim.PagedAdamW
108
  # class_path: bitsandbytes.optim.AdamW8bit
109
  # class_path: bitsandbytes.optim.PagedAdamW8bit