Completed
Push — master ( 3e1d4c...f31f72 )
by Bart
27s
created

SVHN.__init__()   A

Complexity

Conditions 1

Size

Total Lines 5

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 1
dl 0
loc 5
rs 9.4285
1
# -*- coding: utf-8 -*-
2
from fuel.datasets import H5PYDataset
3
from fuel.transformers.defaults import uint8_pixels_to_floatX
4
from fuel.utils import find_in_data_path
5
6
7
class SVHN(H5PYDataset):
8
    """The Street View House Numbers (SVHN) dataset.
9
10
    SVHN [SVHN] is a real-world image dataset for developing machine
11
    learning and object recognition algorithms with minimal requirement
12
    on data preprocessing and formatting. It can be seen as similar in
13
    flavor to MNIST [LBBH] (e.g., the images are of small cropped
14
    digits), but incorporates an order of magnitude more labeled data
15
    (over 600,000 digit images) and comes from a significantly harder,
16
    unsolved, real world problem (recognizing digits and numbers in
17
    natural scene images). SVHN is obtained from house numbers in
18
    Google Street View images.
19
20
    Parameters
21
    ----------
22
    which_format : {1, 2}
23
        SVHN format 1 contains the full numbers, whereas SVHN format 2
24
        contains cropped digits.
25
    which_sets : tuple of str
26
        Which split to load. Valid values are 'train', 'test' and 'extra',
27
        corresponding to the training set (73,257 examples), the test
28
        set (26,032 examples) and the extra set (531,131 examples).
29
        Note that SVHN does not have a validation set; usually you will
30
        create your own training/validation split using the `subset`
31
        argument.
32
33
    """
34
    _filename = 'svhn_format_{}.hdf5'
35
    default_transformers = uint8_pixels_to_floatX(('features',))
36
37
    def __init__(self, which_format, which_sets, **kwargs):
38
        self.which_format = which_format
39
        super(SVHN, self).__init__(
40
            file_or_path=find_in_data_path(self.filename),
41
            which_sets=which_sets, **kwargs)
42
43
    @property
44
    def filename(self):
45
        return self._filename.format(self.which_format)
46