Spaces:
Runtime error
Runtime error
File size: 5,418 Bytes
3b96cb1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List
import mat4py
from mmengine import get_file_backend
from mmpretrain.registry import DATASETS
from .base_dataset import BaseDataset
from .categories import STANFORDCARS_CATEGORIES
@DATASETS.register_module()
class StanfordCars(BaseDataset):
"""The Stanford Cars Dataset.
Support the `Stanford Cars Dataset <https://ai.stanford.edu/~jkrause/cars/car_dataset.html>`_ Dataset.
The official website provides two ways to organize the dataset.
Therefore, after downloading and decompression, the dataset directory structure is as follows.
Stanford Cars dataset directory: ::
Stanford_Cars
βββ car_ims
β βββ 00001.jpg
β βββ 00002.jpg
β βββ ...
βββ cars_annos.mat
or ::
Stanford_Cars
βββ cars_train
β βββ 00001.jpg
β βββ 00002.jpg
β βββ ...
βββ cars_test
β βββ 00001.jpg
β βββ 00002.jpg
β βββ ...
βββ devkit
βββ cars_meta.mat
βββ cars_train_annos.mat
βββ cars_test_annos.mat
βββ cars_test_annoswithlabels.mat
βββ eval_train.m
βββ train_perfect_preds.txt
Args:
data_root (str): The root directory for Stanford Cars dataset.
split (str, optional): The dataset split, supports "train"
and "test". Default to "train".
Examples:
>>> from mmpretrain.datasets import StanfordCars
>>> train_dataset = StanfordCars(data_root='data/Stanford_Cars', split='train')
>>> train_dataset
Dataset StanfordCars
Number of samples: 8144
Number of categories: 196
Root of dataset: data/Stanford_Cars
>>> test_dataset = StanfordCars(data_root='data/Stanford_Cars', split='test')
>>> test_dataset
Dataset StanfordCars
Number of samples: 8041
Number of categories: 196
Root of dataset: data/Stanford_Cars
""" # noqa: E501
METAINFO = {'classes': STANFORDCARS_CATEGORIES}
def __init__(self, data_root: str, split: str = 'train', **kwargs):
splits = ['train', 'test']
assert split in splits, \
f"The split must be one of {splits}, but get '{split}'"
self.split = split
test_mode = split == 'test'
self.backend = get_file_backend(data_root, enable_singleton=True)
anno_file_path = self.backend.join_path(data_root, 'cars_annos.mat')
if self.backend.exists(anno_file_path):
ann_file = 'cars_annos.mat'
data_prefix = ''
else:
if test_mode:
ann_file = self.backend.join_path(
'devkit', 'cars_test_annos_withlabels.mat')
data_prefix = 'cars_test'
else:
ann_file = self.backend.join_path('devkit',
'cars_train_annos.mat')
data_prefix = 'cars_train'
if not self.backend.exists(
self.backend.join_path(data_root, ann_file)):
doc_url = 'https://mmpretrain.readthedocs.io/en/latest/api/datasets.html#stanfordcars' # noqa: E501
raise RuntimeError(
f'The dataset is incorrectly organized, please \
refer to {doc_url} and reorganize your folders.')
super(StanfordCars, self).__init__(
ann_file=ann_file,
data_root=data_root,
data_prefix=data_prefix,
test_mode=test_mode,
**kwargs)
def load_data_list(self):
data = mat4py.loadmat(self.ann_file)['annotations']
data_list = []
if 'test' in data.keys():
# first way
img_paths, labels, test = data['relative_im_path'], data[
'class'], data['test']
num = len(img_paths)
assert num == len(labels) == len(test), 'get error ann file'
for i in range(num):
if not self.test_mode and test[i] == 1:
continue
if self.test_mode and test[i] == 0:
continue
img_path = self.backend.join_path(self.img_prefix,
img_paths[i])
gt_label = labels[i] - 1
info = dict(img_path=img_path, gt_label=gt_label)
data_list.append(info)
else:
# second way
img_names, labels = data['fname'], data['class']
num = len(img_names)
assert num == len(labels), 'get error ann file'
for i in range(num):
img_path = self.backend.join_path(self.img_prefix,
img_names[i])
gt_label = labels[i] - 1
info = dict(img_path=img_path, gt_label=gt_label)
data_list.append(info)
return data_list
def extra_repr(self) -> List[str]:
"""The extra repr information of the dataset."""
body = [
f'Root of dataset: \t{self.data_root}',
]
return body
|