|
1
|
|
|
from fuel.datasets import H5PYDataset |
|
2
|
|
|
from fuel.transformers.defaults import uint8_pixels_to_floatX |
|
3
|
|
|
from fuel.utils import find_in_data_path |
|
4
|
|
|
|
|
5
|
|
|
|
|
6
|
|
|
class CIFAR10(H5PYDataset): |
|
7
|
|
|
"""The CIFAR10 dataset of natural images. |
|
8
|
|
|
|
|
9
|
|
|
This dataset is a labeled subset of the ``80 million tiny images`` |
|
10
|
|
|
dataset [TINY]. It consists of 60,000 32 x 32 colour images in 10 |
|
11
|
|
|
classes, with 6,000 images per class. There are 50,000 training |
|
12
|
|
|
images and 10,000 test images [CIFAR10]. |
|
13
|
|
|
|
|
14
|
|
|
.. [CIFAR10] Alex Krizhevsky, *Learning Multiple Layers of Features |
|
15
|
|
|
from Tiny Images*, technical report, 2009. |
|
16
|
|
|
|
|
17
|
|
|
Parameters |
|
18
|
|
|
---------- |
|
19
|
|
|
which_sets : tuple of str |
|
20
|
|
|
Which split to load. Valid values are 'train' and 'test', |
|
21
|
|
|
corresponding to the training set (50,000 examples) and the test |
|
22
|
|
|
set (10,000 examples). Note that CIFAR10 does not have a |
|
23
|
|
|
validation set; usually you will create your own |
|
24
|
|
|
training/validation split using the `subset` argument. |
|
25
|
|
|
|
|
26
|
|
|
""" |
|
27
|
|
|
filename = 'cifar10.hdf5' |
|
28
|
|
|
default_transformers = uint8_pixels_to_floatX(('features',)) |
|
29
|
|
|
|
|
30
|
|
|
def __init__(self, which_sets, **kwargs): |
|
31
|
|
|
kwargs.setdefault('load_in_memory', True) |
|
32
|
|
|
super(CIFAR10, self).__init__( |
|
33
|
|
|
file_or_path=find_in_data_path(self.filename), |
|
34
|
|
|
which_sets=which_sets, **kwargs) |
|
35
|
|
|
|