|
1
|
|
|
# -*- coding: utf-8 -*- |
|
2
|
|
|
from fuel.datasets import H5PYDataset |
|
3
|
|
|
from fuel.transformers.defaults import uint8_pixels_to_floatX |
|
4
|
|
|
from fuel.utils import find_in_data_path |
|
5
|
|
|
|
|
6
|
|
|
|
|
7
|
|
|
class SVHN(H5PYDataset): |
|
8
|
|
|
"""The Street View House Numbers (SVHN) dataset. |
|
9
|
|
|
|
|
10
|
|
|
SVHN [SVHN] is a real-world image dataset for developing machine |
|
11
|
|
|
learning and object recognition algorithms with minimal requirement |
|
12
|
|
|
on data preprocessing and formatting. It can be seen as similar in |
|
13
|
|
|
flavor to MNIST [LBBH] (e.g., the images are of small cropped |
|
14
|
|
|
digits), but incorporates an order of magnitude more labeled data |
|
15
|
|
|
(over 600,000 digit images) and comes from a significantly harder, |
|
16
|
|
|
unsolved, real world problem (recognizing digits and numbers in |
|
17
|
|
|
natural scene images). SVHN is obtained from house numbers in |
|
18
|
|
|
Google Street View images. |
|
19
|
|
|
|
|
20
|
|
|
Parameters |
|
21
|
|
|
---------- |
|
22
|
|
|
which_format : {1, 2} |
|
23
|
|
|
SVHN format 1 contains the full numbers, whereas SVHN format 2 |
|
24
|
|
|
contains cropped digits. |
|
25
|
|
|
which_sets : tuple of str |
|
26
|
|
|
Which split to load. Valid values are 'train', 'test' and 'extra', |
|
27
|
|
|
corresponding to the training set (73,257 examples), the test |
|
28
|
|
|
set (26,032 examples) and the extra set (531,131 examples). |
|
29
|
|
|
Note that SVHN does not have a validation set; usually you will |
|
30
|
|
|
create your own training/validation split using the `subset` |
|
31
|
|
|
argument. |
|
32
|
|
|
|
|
33
|
|
|
""" |
|
34
|
|
|
_filename = 'svhn_format_{}.hdf5' |
|
35
|
|
|
default_transformers = uint8_pixels_to_floatX(('features',)) |
|
36
|
|
|
|
|
37
|
|
|
def __init__(self, which_format, which_sets, **kwargs): |
|
38
|
|
|
self.which_format = which_format |
|
39
|
|
|
super(SVHN, self).__init__( |
|
40
|
|
|
file_or_path=find_in_data_path(self.filename), |
|
41
|
|
|
which_sets=which_sets, **kwargs) |
|
42
|
|
|
|
|
43
|
|
|
@property |
|
44
|
|
|
def filename(self): |
|
45
|
|
|
return self._filename.format(self.which_format) |
|
46
|
|
|
|