Completed
Push — master ( 6f5a37...36096f )
by Tinghui
49s
created

CasasFuel.get_dataset()   B

Complexity

Conditions 3

Size

Total Lines 24

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
c 1
b 0
f 0
dl 0
loc 24
rs 8.9713
1
import os
2
import pickle
3
import logging
4
from fuel.datasets import H5PYDataset
5
6
logger = logging.getLogger(__name__)
7
8
9
class CasasFuel(object):
10
    """CasasFuel Class to retrieve CASAS smart home data as a fuel dataset object
11
12
    Args:
13
        dir_name (:obj:`string`):
14
            Directory path that contains HDF5 dataset file and complementary dataset information pkl file
15
16
    Attributes:
17
        data_filename (:obj:`str`): Path to `data.hdf5` dataset file
18
        info (:obj:`dict`): complementary dataset information stored in dict format
19
            keys of info includes:
20
21
    """
22
    def __init__(self, dir_name):
23
        logger.debug('Load Casas H5PYDataset from ' + dir_name)
24
        self.data_filename = dir_name + '/data.hdf5'
25
        if os.path.isfile(dir_name + '/info.pkl'):
26
            f = open(dir_name + '/info.pkl', 'rb')
27
            self.info = pickle.load(f)
28
            f.close()
29
30
    def get_dataset(self, which_sets, load_in_memory=False, **kwargs):
31
        """Return fuel dataset object specified by which_sets tuple and load it in memory
32
33
        Args:
34
            which_sets (:obj:`tuple` of :obj:`str`):  containing the name of splits to load.
35
                Valid value are determined by the ``info.pkl`` loaded.
36
                You can get the list of split set names by :meth:`get_set_list()`.
37
                Usually, if the dataset is split by weeks, the split name is in the form of ``week <num>``.
38
                If the dataset is split by days, the split name is in the form of ``day <num>``.
39
            load_in_memory (:obj:`bool`, Optional): Default to False.
40
                Whether to load the data in main memory.
41
42
        Returns:
43
            :class:`fuel.datasets.base.Dataset`: A Fuel dataset object created by
44
                :class:`fuel.datasets.h5py.H5PYDataset`
45
        """
46
        # Check if sets exist as split name in metadata
47
        for set_name in which_sets:
48
            if set_name not in self.info['split_sets']:
49
                logger.error('set %s not found in splits' % set_name)
50
        # Load specified splits and return
51
        return H5PYDataset(file_or_path=self.data_filename,
52
                           which_sets=which_sets,
53
                           load_in_memory=load_in_memory, **kwargs)
54
55
    def get_set_list(self):
56
        """Get the split set list
57
58
        Returns:
59
            :obj:`tuple` of :obj:`str`: A list of split set names
60
        """
61
        return self.info['split_sets']
62
63
    def get_input_dims(self):
64
        """Get the dimension of features
65
66
        Returns:
67
            :obj:`int` : the input feature length
68
        """
69
        dims = len(self.info['index_to_feature'])
70
        return dims
71
72
    def get_output_dims(self):
73
        """Get the dimension of target indices
74
75
        Returns:
76
            :obj:`int` : the target indices
77
        """
78
        dims = len(self.info['index_to_activity'])
79
        return dims
80