import os import click import json from PIL import Image from tqdm import tqdm def resize_image(image: Image, size: int): # resize the image so that the shorter side is `size` pixels w, h = image.size if w < h: new_w = size new_h = int(size * h / w) else: new_w = int(size * w / h) new_h = size return image.resize((new_w, new_h)) SUPPORTED_WEATHERS = ["clear", "rainy", "foggy", "snowy"] SUPPORTED_TIMESOFDAY = ["daytime", "night"] WEATHER_MAPPING = { "clear": "clear", "rainy": "rain", "foggy": "fog", "snowy": "snow", } @click.command() @click.option("--bdd100k_dir", type=str, default="datasets/bdd100k") @click.option("--acdc_dir", type=str, default="datasets/acdc") @click.option("--output_dir", type=str, default="datasets/swim_data") def build_swim_dataset(bdd100k_dir: str, acdc_dir: str, output_dir: str): # build the dataset with format # swim_data # ├── train # │ ├── images # │ │ ├── 000000.jpg # │ │ ├── ... # │ ├── labels.json # ├── val # │ ├── images # │ │ ├── 000000.jpg # │ │ ├── ... # │ ├── labels.json # # labels.json # [ # { # "name": "000000.jpg", # "weather": "clear"|"rain"|"fog"|"snow", # "timeofday": "daytime"|"night", # "source": "bdd100k"|"acdc", # } # ] # # note: # - all images are resized so that the shorter side is 512 pixels # - train: bdd100k train + acdc train + acdc val # - val: bdd100k val + acdc test # # bdd100k format: # bdd100k # ├── images # │ ├── 100k # │ │ ├── train # │ │ │ ├── 000000.jpg # │ │ │ ├── ... # │ │ ├── val # │ │ │ ├── 000000.jpg # │ │ │ ├── ... # ├── labels # │ ├── det_20 # │ │ ├── det_train.json # │ │ ├── det_val.json # # acdc format: # acdc # ├── fog # │ ├── test # │ │ ├── # │ │ │ ├── 000000.jpg # │ │ │ ├── ... # │ ├── test_ref (clear images) # │ │ ├── # │ │ │ ├── 000000.jpg # │ │ │ ├── ... # | ├── train # │ │ ├── # │ │ │ ├── 000000.jpg # │ │ │ ├── ... # | ├── train_ref (clear images) # │ │ ├── # │ │ │ ├── 000000.jpg # │ │ │ ├── ... # | ├── val # │ │ ├── # │ │ │ ├── 000000.jpg # │ │ │ ├── ... # | ├── val_ref (clear images) # │ │ ├── # ├── rain # ├── snow # ├── night os.makedirs(output_dir, exist_ok=True) os.makedirs(os.path.join(output_dir, "train", "images"), exist_ok=True) os.makedirs(os.path.join(output_dir, "val", "images"), exist_ok=True) count = 0 # build train dataset labels = [] # bdd100k train with open(os.path.join(bdd100k_dir, "labels", "det_20", "det_train.json")) as f: bdd100k_train_labels = json.load(f) for label in tqdm(bdd100k_train_labels, desc="bdd100k train"): if label["attributes"]["weather"] not in SUPPORTED_WEATHERS: continue if label["attributes"]["timeofday"] not in SUPPORTED_TIMESOFDAY: continue new_name = f"{count:06d}.jpg" count += 1 image = Image.open( os.path.join(bdd100k_dir, "images", "100k", "train", label["name"]) ) image = resize_image(image, 512) image.save(os.path.join(output_dir, "train", "images", new_name)) labels.append( { "name": new_name, "weather": WEATHER_MAPPING[label["attributes"]["weather"]], "timeofday": label["attributes"]["timeofday"], "source": "bdd100k", } ) # acdc train + val for weather in ["fog", "rain", "snow", "night"]: for split in ["train", "val", "train_ref", "val_ref"]: for subfolder in os.listdir(os.path.join(acdc_dir, weather, split)): for filename in os.listdir( os.path.join(acdc_dir, weather, split, subfolder) ): new_name = f"{count:06d}.jpg" count += 1 image = Image.open( os.path.join(acdc_dir, weather, split, subfolder, filename) ) image = resize_image(image, 512) image.save(os.path.join(output_dir, "train", "images", new_name)) if split.endswith("ref"): timeofday_ = "daytime" weather_ = "clear" else: timeofday_ = "night" if weather == "night" else "daytime" weather_ = "clear" if weather == "night" else weather labels.append( { "name": new_name, "weather": weather_, "timeofday": timeofday_, "source": "acdc", } ) with open(os.path.join(output_dir, "train", "labels.json"), "w") as f: json.dump(labels, f, indent=4) # build val dataset labels = [] # bdd100k val with open(os.path.join(bdd100k_dir, "labels", "det_20", "det_val.json")) as f: bdd100k_val_labels = json.load(f) for label in tqdm(bdd100k_val_labels, desc="bdd100k val"): if label["attributes"]["weather"] not in SUPPORTED_WEATHERS: continue if label["attributes"]["timeofday"] not in SUPPORTED_TIMESOFDAY: continue new_name = f"{count:06d}.jpg" count += 1 image = Image.open( os.path.join(bdd100k_dir, "images", "100k", "val", label["name"]) ) image = resize_image(image, 512) image.save(os.path.join(output_dir, "val", "images", new_name)) labels.append( { "name": new_name, "weather": WEATHER_MAPPING[label["attributes"]["weather"]], "timeofday": label["attributes"]["timeofday"], "source": "bdd100k", } ) # acdc test for weather in ["fog", "rain", "snow", "night"]: for split in ["test", "test_ref"]: for subfolder in os.listdir(os.path.join(acdc_dir, weather, split)): for filename in os.listdir( os.path.join(acdc_dir, weather, split, subfolder) ): new_name = f"{count:06d}.jpg" count += 1 image = Image.open( os.path.join(acdc_dir, weather, split, subfolder, filename) ) image = resize_image(image, 512) image.save(os.path.join(output_dir, "val", "images", new_name)) if split.endswith("ref"): timeofday_ = "daytime" weather_ = "clear" else: timeofday_ = "night" if weather == "night" else "daytime" weather_ = "clear" if weather == "night" else weather labels.append( { "name": new_name, "weather": weather_, "timeofday": timeofday_, "source": "acdc", } ) with open(os.path.join(output_dir, "val", "labels.json"), "w") as f: json.dump(labels, f, indent=4) if __name__ == "__main__": build_swim_dataset()