model
Browse files- scripts/model.yaml +3 -3
scripts/model.yaml
CHANGED
@@ -27,7 +27,8 @@ model_config:
|
|
27 |
out_dir: out/pretrain/
|
28 |
|
29 |
# The precision to use for pretraining. Possible choices: "bf16-true", "bf16-mixed", "32-true". (type: Optional[str], default: null)
|
30 |
-
precision: bf16-mixed
|
|
|
31 |
|
32 |
# Optional path to a checkpoint directory to initialize the model from.
|
33 |
# Useful for continued pretraining. Mutually exclusive with ``resume``. (type: Optional[Path], default: null)
|
@@ -100,8 +101,7 @@ eval:
|
|
100 |
# Optimizer-related arguments
|
101 |
optimizer:
|
102 |
# class_path: torch.optim.AdamW
|
103 |
-
class_path:
|
104 |
-
# class_path: grokadamw.GrokAdamW
|
105 |
|
106 |
init_args:
|
107 |
# (type: float, default: 0.001)
|
|
|
27 |
out_dir: out/pretrain/
|
28 |
|
29 |
# The precision to use for pretraining. Possible choices: "bf16-true", "bf16-mixed", "32-true". (type: Optional[str], default: null)
|
30 |
+
# precision: bf16-mixed
|
31 |
+
precision: bf16-true
|
32 |
|
33 |
# Optional path to a checkpoint directory to initialize the model from.
|
34 |
# Useful for continued pretraining. Mutually exclusive with ``resume``. (type: Optional[Path], default: null)
|
|
|
101 |
# Optimizer-related arguments
|
102 |
optimizer:
|
103 |
# class_path: torch.optim.AdamW
|
104 |
+
class_path: PagedAdamW
|
|
|
105 |
|
106 |
init_args:
|
107 |
# (type: float, default: 0.001)
|