qninhdt
/

swim_new

qninhdt commited on about 22 hours ago

Commit

95aa666

•

1 Parent(s): 0846051

cc

Files changed (6) hide show

train_autoencoder.sh → .nfs00000001516db020000048ee RENAMED Viewed

File without changes

configs/data/swim.yaml CHANGED Viewed

@@ -1,4 +1,4 @@
 _target_: swim.data.swim_data.SwimDataModule
 root_dir: /home/qninh/projects/swim_/datasets/swim_data
-batch_size: 4
 img_size: 64

 _target_: swim.data.swim_data.SwimDataModule
 root_dir: /home/qninh/projects/swim_/datasets/swim_data
+batch_size: 8
 img_size: 64

configs/train.yaml CHANGED Viewed

@@ -45,5 +45,7 @@ test: True
 # simply provide checkpoint path to resume training
 ckpt_path: null
 # seed for random number generators in pytorch, numpy and python.random
 seed: 42

 # simply provide checkpoint path to resume training
 ckpt_path: null
+compile: false
 # seed for random number generators in pytorch, numpy and python.random
 seed: 42

swim/models/autoencoder.py CHANGED Viewed

@@ -165,9 +165,12 @@ class Autoencoder(LightningModule):
         if batch_idx == 0:
             self.log_images(img, recon)
-    # def setup(self, stage: str) -> None:
-    #     if self.hparams.compile and stage == "fit":
-    #         self.net = torch.compile(self.net)
     def log_images(self, ori_images, recon_images):
         """

         if batch_idx == 0:
             self.log_images(img, recon)
+    def compile(self):
+        self.encoder = torch.compile(self.encoder, "max-autotune")
+        self.decoder = torch.compile(self.decoder, "max-autotune")
+        self.quant_conv = torch.compile(self.quant_conv, "max-autotune")
+        self.post_quant_conv = torch.compile(self.post_quant_conv, "max-autotune")
+        self.lpips = torch.compile(self.lpips, "max-autotune")
     def log_images(self, ori_images, recon_images):
         """

swim/train.py CHANGED Viewed

@@ -90,6 +90,9 @@ def train(cfg: DictConfig) -> Tuple[Dict[str, Any], Dict[str, Any]]:
         # compute model learning rate
         model.learning_rate = cfg.data.batch_size * cfg.model.base_learning_rate
         log.info("Starting training!")
         trainer.fit(model=model, datamodule=datamodule, ckpt_path=cfg.get("ckpt_path"))

         # compute model learning rate
         model.learning_rate = cfg.data.batch_size * cfg.model.base_learning_rate
+        if cfg.compile:
+            model.compile()
         log.info("Starting training!")
         trainer.fit(model=model, datamodule=datamodule, ckpt_path=cfg.get("ckpt_path"))

train_autoencoder_64.sh ADDED Viewed

+python swim/train.py \
+    data.root_dir=/cm/shared/ninhnq3/datasets/swim_data \
+    data.img_size=512 \
+    data.batch_size=16 \
+    compile=true \
+    callbacks.model_checkpoint.dirpath=/cm/shared/ninhnq3/checkpoints/swim/autoencoder/simple