Spaces:

xichen98cn
/

FrozenSeg

Runtime error

App Files Files Community

xichen98cn commited on Sep 8

Commit

e7ae87a

•

1 Parent(s): 5e46b82

Update frozenseg/frozenseg.py

Browse files

Files changed (1) hide show

frozenseg/frozenseg.py +59 -22

frozenseg/frozenseg.py CHANGED Viewed

@@ -16,6 +16,7 @@ from segment_anything.build_sam import sam_model_registry
 from .modeling.transformer_decoder.frozenseg_transformer_decoder import MaskPooling, get_classification_logits
 from segment_anything import sam_model_registry
 import pickle
 VILD_PROMPT = [
     "a photo of a {}.",
     "This is a photo of a {}",
@@ -33,6 +34,20 @@ VILD_PROMPT = [
     "There is a large {} in the scene.",
 ]
 @META_ARCH_REGISTRY.register()
 class FrozenSeg(nn.Module):
@@ -132,6 +147,14 @@ class FrozenSeg(nn.Module):
         _, self.train_num_templates, self.train_class_names = self.prepare_class_names_from_metadata(train_metadata, train_metadata)
         self.category_overlapping_mask, self.test_num_templates, self.test_class_names = self.prepare_class_names_from_metadata(test_metadata, train_metadata)
         # sam args
         sam_ckpt_path = {
             'vit_t': './pretrained_checkpoint/mobile_sam.pt',
@@ -165,13 +188,7 @@ class FrozenSeg(nn.Module):
     def prepare_class_names_from_metadata(self, metadata, train_metadata):
-        def split_labels(x):
-            res = []
-            for x_ in x:
-                x_ = x_.replace(', ', ',')
-                x_ = x_.split(',') # there can be multiple synonyms for single class
-                res.append(x_)
-            return res
         # get text classifier
         try:
             class_names = split_labels(metadata.stuff_classes) # it includes both thing and stuff
@@ -188,13 +205,6 @@ class FrozenSeg(nn.Module):
         category_overlapping_mask = torch.tensor(
             category_overlapping_list, dtype=torch.long)
-        def fill_all_templates_ensemble(x_=''):
-            res = []
-            for x in x_:
-                for template in VILD_PROMPT:
-                    res.append(template.format(x))
-            return res, len(res) // len(VILD_PROMPT)
         num_templates = []
         templated_class_names = []
         for x in class_names:
@@ -228,17 +238,44 @@ class FrozenSeg(nn.Module):
             return self.train_text_classifier, self.train_num_templates
         else:
             if self.test_text_classifier is None:
                 text_classifier = []
                 # this is needed to avoid oom, which may happen when num of class is large
                 bs = 128
-                for idx in range(0, len(self.test_class_names), bs):
-                    text_classifier.append(self.backbone.get_text_classifier(self.test_class_names[idx:idx+bs], self.device).detach())
-                text_classifier = torch.cat(text_classifier, dim=0)
-                # average across templates and normalization.
-                text_classifier /= text_classifier.norm(dim=-1, keepdim=True)
-                text_classifier = text_classifier.reshape(text_classifier.shape[0]//len(VILD_PROMPT), len(VILD_PROMPT), text_classifier.shape[-1]).mean(1)
-                text_classifier /= text_classifier.norm(dim=-1, keepdim=True)
                 self.test_text_classifier = text_classifier
             return self.test_text_classifier, self.test_num_templates

 from .modeling.transformer_decoder.frozenseg_transformer_decoder import MaskPooling, get_classification_logits
 from segment_anything import sam_model_registry
 import pickle
+import os
 VILD_PROMPT = [
     "a photo of a {}.",
     "This is a photo of a {}",
     "There is a large {} in the scene.",
 ]
+def split_labels(x):
+    res = []
+    for x_ in x:
+        x_ = x_.replace(', ', ',')
+        x_ = x_.split(',') # there can be multiple synonyms for single class
+        res.append(x_)
+    return res
+def fill_all_templates_ensemble(x_=''):
+    res = []
+    for x in x_:
+        for template in VILD_PROMPT:
+            res.append(template.format(x))
+    return res, len(res) // len(VILD_PROMPT)
 @META_ARCH_REGISTRY.register()
 class FrozenSeg(nn.Module):
         _, self.train_num_templates, self.train_class_names = self.prepare_class_names_from_metadata(train_metadata, train_metadata)
         self.category_overlapping_mask, self.test_num_templates, self.test_class_names = self.prepare_class_names_from_metadata(test_metadata, train_metadata)
+        self.demo_all_text_embedding_cache = {}
+        # This consists of COCO, ADE20K, LVIS
+        if os.path.exists("demo_all_text_embedding_cache.pth"):
+            # key: str of class name, value: tensor in shape of C
+            self.demo_all_text_embedding_cache = torch.load("demo_all_text_embedding_cache.pth", map_location=self.device)
+            self.demo_all_text_embedding_cache = {k:v.to(self.device) for k,v in self.demo_all_text_embedding_cache.items()}
         # sam args
         sam_ckpt_path = {
             'vit_t': './pretrained_checkpoint/mobile_sam.pt',
     def prepare_class_names_from_metadata(self, metadata, train_metadata):
         # get text classifier
         try:
             class_names = split_labels(metadata.stuff_classes) # it includes both thing and stuff
         category_overlapping_mask = torch.tensor(
             category_overlapping_list, dtype=torch.long)
         num_templates = []
         templated_class_names = []
         for x in class_names:
             return self.train_text_classifier, self.train_num_templates
         else:
             if self.test_text_classifier is None:
+                try:
+                    nontemplated_class_names = split_labels(self.test_metadata.stuff_classes) # it includes both thing and stuff
+                except:
+                    # this could be for insseg, where only thing_classes are available
+                    nontemplated_class_names = split_labels(self.test_metadata.thing_classes)
+                text2classifier = {}
+                test_class_names = []
+                uncached_class_name = []
                 text_classifier = []
+                # exclude those already in cache
+                for class_names in nontemplated_class_names:
+                    for class_name in class_names:
+                        if class_name in self.demo_all_text_embedding_cache:
+                            text2classifier[class_name] = self.demo_all_text_embedding_cache[class_name].to(self.device)
+                        else:
+                            test_class_names += fill_all_templates_ensemble([class_name])[0]
+                            uncached_class_name.append(class_name)
+                print("Uncached texts:", len(uncached_class_name), uncached_class_name, test_class_names)
                 # this is needed to avoid oom, which may happen when num of class is large
                 bs = 128
+                for idx in range(0, len(test_class_names), bs):
+                    text_classifier.append(self.backbone.get_text_classifier(test_class_names[idx:idx+bs], self.device).detach())
+                if len(text_classifier) > 0:
+                    text_classifier = torch.cat(text_classifier, dim=0)
+                    text_classifier /= text_classifier.norm(dim=-1, keepdim=True)
+                    text_classifier = text_classifier.reshape(text_classifier.shape[0]//len(VILD_PROMPT), len(VILD_PROMPT), text_classifier.shape[-1]).mean(1)
+                    text_classifier /= text_classifier.norm(dim=-1, keepdim=True)
+                    assert text_classifier.shape[0] == len(uncached_class_name)
+                    for idx in range(len(uncached_class_name)):
+                        self.demo_all_text_embedding_cache[uncached_class_name[idx]] = text_classifier[idx]
+                        text2classifier[uncached_class_name[idx]] = text_classifier[idx]
+                text_classifier = []
+                for class_names in nontemplated_class_names:
+                    for text in class_names:
+                        text_classifier.append(text2classifier[text].to(self.device))
+                text_classifier = torch.stack(text_classifier, dim=0).to(self.device)
                 self.test_text_classifier = text_classifier
             return self.test_text_classifier, self.test_num_templates