Spaces:
Runtime error
Runtime error
File size: 3,772 Bytes
3b96cb1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List
from mmengine import get_file_backend, list_from_file
from mmpretrain.registry import DATASETS
from .base_dataset import BaseDataset
from .categories import CALTECH101_CATEGORIES
@DATASETS.register_module()
class Caltech101(BaseDataset):
"""The Caltech101 Dataset.
Support the `Caltech101 <https://data.caltech.edu/records/mzrjq-6wc02>`_ Dataset.
After downloading and decompression, the dataset directory structure is as follows.
Caltech101 dataset directory: ::
caltech-101
βββ 101_ObjectCategories
β βββ class_x
β β βββ xx1.jpg
β β βββ xx2.jpg
β β βββ ...
β βββ class_y
β β βββ yy1.jpg
β β βββ yy2.jpg
β β βββ ...
β βββ ...
βββ Annotations
β βββ class_x
β β βββ xx1.mat
β β βββ ...
β βββ ...
βββ meta
β βββ train.txt
β βββ test.txt
βββ ....
Please note that since there is no official splitting for training and
test set, you can use the train.txt and text.txt provided by us or
create your own annotation files. Here is the download
`link <https://download.openmmlab.com/mmpretrain/datasets/caltech_meta.zip>`_
for the annotations.
Args:
data_root (str): The root directory for the Caltech101 dataset.
split (str, optional): The dataset split, supports "train" and "test".
Default to "train".
Examples:
>>> from mmpretrain.datasets import Caltech101
>>> train_dataset = Caltech101(data_root='data/caltech-101', split='train')
>>> train_dataset
Dataset Caltech101
Number of samples: 3060
Number of categories: 102
Root of dataset: data/caltech-101
>>> test_dataset = Caltech101(data_root='data/caltech-101', split='test')
>>> test_dataset
Dataset Caltech101
Number of samples: 6728
Number of categories: 102
Root of dataset: data/caltech-101
""" # noqa: E501
METAINFO = {'classes': CALTECH101_CATEGORIES}
def __init__(self, data_root: str, split: str = 'train', **kwargs):
splits = ['train', 'test']
assert split in splits, \
f"The split must be one of {splits}, but get '{split}'"
self.split = split
self.backend = get_file_backend(data_root, enable_singleton=True)
if split == 'train':
ann_file = self.backend.join_path('meta', 'train.txt')
else:
ann_file = self.backend.join_path('meta', 'test.txt')
data_prefix = '101_ObjectCategories'
test_mode = split == 'test'
super(Caltech101, self).__init__(
ann_file=ann_file,
data_root=data_root,
data_prefix=data_prefix,
test_mode=test_mode,
**kwargs)
def load_data_list(self):
"""Load images and ground truth labels."""
pairs = list_from_file(self.ann_file)
data_list = []
for pair in pairs:
path, gt_label = pair.split()
img_path = self.backend.join_path(self.img_prefix, path)
info = dict(img_path=img_path, gt_label=int(gt_label))
data_list.append(info)
return data_list
def extra_repr(self) -> List[str]:
"""The extra repr information of the dataset."""
body = [
f'Root of dataset: \t{self.data_root}',
]
return body
|