File size: 5,418 Bytes
3b96cb1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List

import mat4py
from mmengine import get_file_backend

from mmpretrain.registry import DATASETS
from .base_dataset import BaseDataset
from .categories import STANFORDCARS_CATEGORIES


@DATASETS.register_module()
class StanfordCars(BaseDataset):
    """The Stanford Cars Dataset.

    Support the `Stanford Cars Dataset <https://ai.stanford.edu/~jkrause/cars/car_dataset.html>`_ Dataset.
    The official website provides two ways to organize the dataset.
    Therefore, after downloading and decompression, the dataset directory structure is as follows.

    Stanford Cars dataset directory: ::

        Stanford_Cars
        β”œβ”€β”€ car_ims
        β”‚   β”œβ”€β”€ 00001.jpg
        β”‚   β”œβ”€β”€ 00002.jpg
        β”‚   └── ...
        └── cars_annos.mat

    or ::

        Stanford_Cars
        β”œβ”€β”€ cars_train
        β”‚   β”œβ”€β”€ 00001.jpg
        β”‚   β”œβ”€β”€ 00002.jpg
        β”‚   └── ...
        β”œβ”€β”€ cars_test
        β”‚   β”œβ”€β”€ 00001.jpg
        β”‚   β”œβ”€β”€ 00002.jpg
        β”‚   └── ...
        └── devkit
            β”œβ”€β”€ cars_meta.mat
            β”œβ”€β”€ cars_train_annos.mat
            β”œβ”€β”€ cars_test_annos.mat
            β”œβ”€β”€ cars_test_annoswithlabels.mat
            β”œβ”€β”€ eval_train.m
            └── train_perfect_preds.txt

    Args:
        data_root (str): The root directory for Stanford Cars dataset.
        split (str, optional): The dataset split, supports "train"
            and "test". Default to "train".

    Examples:
        >>> from mmpretrain.datasets import StanfordCars
        >>> train_dataset = StanfordCars(data_root='data/Stanford_Cars', split='train')
        >>> train_dataset
        Dataset StanfordCars
            Number of samples:  8144
            Number of categories:       196
            Root of dataset:    data/Stanford_Cars
        >>> test_dataset = StanfordCars(data_root='data/Stanford_Cars', split='test')
        >>> test_dataset
        Dataset StanfordCars
            Number of samples:  8041
            Number of categories:       196
            Root of dataset:    data/Stanford_Cars
    """  # noqa: E501

    METAINFO = {'classes': STANFORDCARS_CATEGORIES}

    def __init__(self, data_root: str, split: str = 'train', **kwargs):

        splits = ['train', 'test']
        assert split in splits, \
            f"The split must be one of {splits}, but get '{split}'"
        self.split = split

        test_mode = split == 'test'
        self.backend = get_file_backend(data_root, enable_singleton=True)

        anno_file_path = self.backend.join_path(data_root, 'cars_annos.mat')
        if self.backend.exists(anno_file_path):
            ann_file = 'cars_annos.mat'
            data_prefix = ''
        else:
            if test_mode:
                ann_file = self.backend.join_path(
                    'devkit', 'cars_test_annos_withlabels.mat')
                data_prefix = 'cars_test'
            else:
                ann_file = self.backend.join_path('devkit',
                                                  'cars_train_annos.mat')
                data_prefix = 'cars_train'

            if not self.backend.exists(
                    self.backend.join_path(data_root, ann_file)):
                doc_url = 'https://mmpretrain.readthedocs.io/en/latest/api/datasets.html#stanfordcars'  # noqa: E501
                raise RuntimeError(
                    f'The dataset is incorrectly organized, please \
                    refer to {doc_url} and reorganize your folders.')

        super(StanfordCars, self).__init__(
            ann_file=ann_file,
            data_root=data_root,
            data_prefix=data_prefix,
            test_mode=test_mode,
            **kwargs)

    def load_data_list(self):
        data = mat4py.loadmat(self.ann_file)['annotations']

        data_list = []
        if 'test' in data.keys():
            # first way
            img_paths, labels, test = data['relative_im_path'], data[
                'class'], data['test']
            num = len(img_paths)
            assert num == len(labels) == len(test), 'get error ann file'
            for i in range(num):
                if not self.test_mode and test[i] == 1:
                    continue
                if self.test_mode and test[i] == 0:
                    continue
                img_path = self.backend.join_path(self.img_prefix,
                                                  img_paths[i])
                gt_label = labels[i] - 1
                info = dict(img_path=img_path, gt_label=gt_label)
                data_list.append(info)
        else:
            # second way
            img_names, labels = data['fname'], data['class']
            num = len(img_names)
            assert num == len(labels), 'get error ann file'
            for i in range(num):
                img_path = self.backend.join_path(self.img_prefix,
                                                  img_names[i])
                gt_label = labels[i] - 1
                info = dict(img_path=img_path, gt_label=gt_label)
                data_list.append(info)

        return data_list

    def extra_repr(self) -> List[str]:
        """The extra repr information of the dataset."""
        body = [
            f'Root of dataset: \t{self.data_root}',
        ]
        return body