bghira
/

sd3-cheechandchong-regularised

@@ -143,13 +143,13 @@ You may reuse the base model text encoder for inference.
 - Training steps: 100
 - Learning rate: 1e-05
 - Max grad norm: 0.01
-- Effective batch size: 12
-  - Micro-batch size: 4
   - Gradient accumulation steps: 1
   - Number of GPUs: 3
 - Prediction type: flow-matching
 - Rescaled betas zero SNR: False
-- Optimizer: optimi-lion
 - Precision: Pure BF16
 - Quantised: Yes: int8-quanto
 - Xformers: Not used
@@ -165,14 +165,11 @@ You may reuse the base model text encoder for inference.
     "factor": 12,
     "apply_preset": {
         "target_module": [
-            "JointTransformerBlock"
         ],
         "module_algo_map": {
-            "FeedForward": {
                 "factor": 6
-            },
-            "JointTransformerBlock": {
-                "factor": 12
             }
         }
     }

 - Training steps: 100
 - Learning rate: 1e-05
 - Max grad norm: 0.01
+- Effective batch size: 3
+  - Micro-batch size: 1
   - Gradient accumulation steps: 1
   - Number of GPUs: 3
 - Prediction type: flow-matching
 - Rescaled betas zero SNR: False
+- Optimizer: optimi-stableadamw
 - Precision: Pure BF16
 - Quantised: Yes: int8-quanto
 - Xformers: Not used
     "factor": 12,
     "apply_preset": {
         "target_module": [
+            "Attention"
         ],
         "module_algo_map": {
+            "Attention": {
                 "factor": 6
             }
         }
     }