Completed
Push — master ( eb9dd6...a22857 )
by Alexandre M.
02:03
created

hansel.list_children()   D

Complexity

Conditions 8

Size

Total Lines 36

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 8
dl 0
loc 36
rs 4
1
# -*- coding: utf-8 -*-
2
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
3
# vi: set ft=python sts=4 ts=4 sw=4 et:
4
"""
5
Utilities to make crumbs
6
"""
7
import os
8
import os.path as op
9
import operator
10
import fnmatch
11
12
from   collections import Mapping
13
from   functools   import partial, reduce
14
from   itertools   import product
15
16
17
def remove_duplicates(lst):
18
    """ Return a sorted lst of non-duplicated elements from `lst`.
19
20
    Parameters
21
    ----------
22
    lst: sequence of any
23
24
    Returns
25
    -------
26
    fslst:
27
        Filtered and sorted `lst` with non duplicated elements of `lst`.
28
    """
29
    return sorted(list(set(lst)))
30
31
32
def remove_ignored(strs, ignore):
33
    """ Remove from `strs` the matches to the `fnmatch` (glob) patterns and
34
    return the result in a list."""
35
    nustrs = strs.copy()
36
    for ign in ignore:
37
        nustrs = [item for item in nustrs if not fnmatch.fnmatch(item, ign)]
38
39
    return nustrs
40
41
42
def list_children(path, just_dirs=False, ignore=[]):
43
    """ Return the immediate elements (files and folders) in `path`.
44
45
    Parameters
46
    ----------
47
    path: str
48
49
    just_dirs: bool
50
        If True will return only folders.
51
52
    ignore: sequence of str
53
        Sequence of glob patterns to ignore from the listing.
54
55
    Returns
56
    -------
57
    paths: list of str
58
    """
59
    if not op.exists(path):
60
        raise IOError("Expected an existing path, but could not"
61
                      " find {}.".format(path))
62
63
    if op.isfile(path):
64
        if just_dirs:
65
            vals = []
66
        else:
67
            vals = [path]
68
    else:
69
        if just_dirs: # this means we have to list only folders
70
            vals = [d for d in os.listdir(path) if op.isdir(op.join(path, d))]
71
        else:   # this means we have to list files
72
            vals = os.listdir(path)
73
74
    if ignore:
75
        vals = remove_ignored(vals, ignore)
76
77
    return vals
78
79
80
class ParameterGrid(object):
81
    """
82
    Picked from sklearn: https://github.com/scikit-learn/scikit-learn
83
84
    Grid of parameters with a discrete number of values for each.
85
    Can be used to iterate over parameter value combinations with the
86
    Python built-in function iter.
87
88
    Read more in the :ref:`User Guide <grid_search>`.
89
    Parameters
90
    ----------
91
    param_grid : dict of string to sequence, or sequence of such
92
        The parameter grid to explore, as a dictionary mapping estimator
93
        parameters to sequences of allowed values.
94
        An empty dict signifies default parameters.
95
        A sequence of dicts signifies a sequence of grids to search, and is
96
        useful to avoid exploring parameter combinations that make no sense
97
        or have no effect. See the examples below.
98
    Examples
99
    --------
100
    >>> from sklearn.grid_search import ParameterGrid
101
    >>> param_grid = {'a': [1, 2], 'b': [True, False]}
102
    >>> list(ParameterGrid(param_grid)) == (
103
    ...    [{'a': 1, 'b': True}, {'a': 1, 'b': False},
104
    ...     {'a': 2, 'b': True}, {'a': 2, 'b': False}])
105
    True
106
    >>> grid = [{'kernel': ['linear']}, {'kernel': ['rbf'], 'gamma': [1, 10]}]
107
    >>> list(ParameterGrid(grid)) == [{'kernel': 'linear'},
108
    ...                               {'kernel': 'rbf', 'gamma': 1},
109
    ...                               {'kernel': 'rbf', 'gamma': 10}]
110
    True
111
    >>> ParameterGrid(grid)[1] == {'kernel': 'rbf', 'gamma': 1}
112
    True
113
    """
114
115
    def __init__(self, param_grid):
116
        if isinstance(param_grid, Mapping):
117
            # wrap dictionary in a singleton list to support either dict
118
            # or list of dicts
119
            param_grid = [param_grid]
120
        self.param_grid = param_grid
121
122
    def __iter__(self):
123
        """Iterate over the points in the grid.
124
        Returns
125
        -------
126
        params : iterator over dict of string to any
127
            Yields dictionaries mapping each estimator parameter to one of its
128
            allowed values.
129
        """
130
        for p in self.param_grid:
131
            # Always sort the keys of a dictionary, for reproducibility
132
            items = sorted(p.items())
133
            if not items:
134
                yield {}
135
            else:
136
                keys, values = zip(*items)
137
                for v in product(*values):
138
                    params = dict(zip(keys, v))
139
                    yield params
140
141
    def __len__(self):
142
        """Number of points on the grid."""
143
        # Product function that can handle iterables (np.product can't).
144
        product = partial(reduce, operator.mul)
145
        return sum(product(len(v) for v in p.values()) if p else 1
146
                   for p in self.param_grid)
147
148
    def __getitem__(self, ind):
149
        """Get the parameters that would be ``ind``th in iteration
150
        Parameters
151
        ----------
152
        ind : int
153
            The iteration index
154
        Returns
155
        -------
156
        params : dict of string to any
157
            Equal to list(self)[ind]
158
        """
159
        # This is used to make discrete sampling without replacement memory
160
        # efficient.
161
        for sub_grid in self.param_grid:
162
            # XXX: could memoize information used here
163
            if not sub_grid:
164
                if ind == 0:
165
                    return {}
166
                else:
167
                    ind -= 1
168
                    continue
169
170
            # Reverse so most frequent cycling parameter comes first
171
            keys, values_lists = zip(*sorted(sub_grid.items())[::-1])
172
            sizes = [len(v_list) for v_list in values_lists]
173
            product = partial(reduce, operator.mul)
174
            total = product(sizes)
175
176
            if ind >= total:
177
                # Try the next grid
178
                ind -= total
179
            else:
180
                out = {}
181
                for key, v_list, n in zip(keys, values_lists, sizes):
182
                    ind, offset = divmod(ind, n)
183
                    out[key] = v_list[offset]
184
                return out
185
186
        raise IndexError('ParameterGrid index out of range')
187