# Copyright (c) OpenMMLab. All rights reserved. from typing import List import mmengine from mmengine.dataset import BaseDataset from mmengine.fileio import get_file_backend from pycocotools.coco import COCO from mmpretrain.registry import DATASETS @DATASETS.register_module() class NoCaps(BaseDataset): """NoCaps dataset. Args: data_root (str): The root directory for ``data_prefix`` and ``ann_file``.. ann_file (str): Annotation file path. data_prefix (dict): Prefix for data field. Defaults to ``dict(img_path='')``. pipeline (Sequence): Processing pipeline. Defaults to an empty tuple. **kwargs: Other keyword arguments in :class:`BaseDataset`. """ def load_data_list(self) -> List[dict]: """Load data list.""" img_prefix = self.data_prefix['img_path'] with mmengine.get_local_path(self.ann_file) as ann_file: coco = COCO(ann_file) file_backend = get_file_backend(img_prefix) data_list = [] for ann in coco.anns.values(): image_id = ann['image_id'] image_path = file_backend.join_path( img_prefix, coco.imgs[image_id]['file_name']) data_info = { 'image_id': image_id, 'img_path': image_path, 'gt_caption': None } data_list.append(data_info) return data_list