from pycocotools.coco import COCO import requests import yaml def get_images(cfg_path = 'cfg/cfg.yml'): """To query and get coco dataset by a certain subset e.g. 'person' Args: path (str, optional): _description_. Defaults to 'data/annotations_trainval2017/annotations/instances_val2017.json'. catNms (list, optional): _description_. Defaults to ['person']. """ cfg_file = open(cfg_path) cfg = yaml.load(cfg_file, Loader=yaml.FullLoader) # instantiate COCO specifying the annotations json path coco = COCO(cfg['dataset']['annotations_folder_path'] + cfg['dataset']['annotations_fname']) # Specify a list of category names of interest catIds = coco.getCatIds(catNms=cfg['dataset']['classes']) # Get the corresponding image ids and images using loadImgs imgIds = coco.getImgIds(catIds=catIds) images = coco.loadImgs(imgIds) # Save the images into a local folder for im in images: img_data = requests.get(im['coco_url']).content with open(cfg['dataset']['img_folder_path'] + im['file_name'], 'wb') as handler: handler.write(img_data) return if __name__ == '__main__': get_images(cfg_path = 'cfg/cfg.yml')