Completed
Push — master ( 861f20...93886c )
by Alexandre M.
54s
created

hansel.Crumb.ls()   F

Complexity

Conditions 13

Size

Total Lines 61

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 13
dl 0
loc 61
rs 2.9618

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like hansel.Crumb.ls() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
3
# vi: set ft=python sts=4 ts=4 sw=4 et:
4
"""
5
Crumb class: the smart path model class.
6
"""
7
8
import os.path     as op
9
from   copy        import deepcopy
10
from   collections import OrderedDict, Mapping, Sequence
11
from   pathlib     import Path
12
from   functools   import partial
13
14
from   six import string_types
15
16
from   .utils import remove_duplicates, list_children
17
from   ._utils import (_get_path, _arg_name,
18
                       _is_crumb_arg, _replace,
19
                       _split_exists, _split,
20
                       _touch, has_crumbs, is_valid,
21
                       #_arg_format,
22
                       )
23
24
25
class Crumb(object):
26
    """ The crumb path model class.
27
    Parameters
28
    ----------
29
    crumb_path: str
30
        A file or folder path with crumb arguments. See Examples.
31
32
    ignore_list: sequence of str
33
        A list of `fnmatch` patterns of filenames to be ignored.
34
35
    Examples
36
    --------
37
    >>> crumb = Crumb("{base_dir}/raw/{subject_id}/{session_id}/{modality}/{image}")
38
    >>> cr = Crumb(op.join(op.expanduser('~'), '{user_folder}'))
39
    """
40
    # symbols indicating start and end of a crumb argument
41
    _start_end_syms = ('{', '}')
42
43
    # specify partial functions from _utils with _arg_start_sym and _arg_end_sym
44
    # everything would be much simpler if I hardcoded these symbols but I still
45
    # feel that this flexibility is nice to have.
46
    # _arg_format   = partial(_arg_format,   start_sym=_arg_start_sym, end_sym=_arg_end_sym)
47
    _is_crumb_arg = partial(_is_crumb_arg, start_end_syms=_start_end_syms)
48
    _arg_name     = partial(_arg_name,     start_end_syms=_start_end_syms)
49
    is_valid      = partial(is_valid,      start_end_syms=_start_end_syms)
50
    has_crumbs    = partial(has_crumbs,    start_end_syms=_start_end_syms)
51
    _replace      = partial(_replace,      start_end_syms=_start_end_syms)
52
    _split        = partial(_split,        start_end_syms=_start_end_syms)
53
    _touch        = partial(_touch,        start_end_syms=_start_end_syms)
54
    _split_exists = partial(_split_exists, start_end_syms=_start_end_syms)
55
56
57
    def __init__(self, crumb_path, ignore_list=()):
58
        self._path   = _get_path(crumb_path)
59
        self._argidx = OrderedDict()
60
        self._ignore = ignore_list
61
        self._update()
62
63
    @property
64
    def path(self):
65
        """Return the current crumb path string."""
66
        return self._path
67
68
    @path.setter
69
    def path(self, value):
70
        """ Set the current crumb path string and updates the internal members.
71
        Parameters
72
        ----------
73
        value: str
74
            A file or folder path with crumb arguments. See Examples in class docstring.
75
        """
76
        self._path = value
77
        self._update()
78
79
    def _check(self):
80
        if not self.is_valid(self._path):
81
            raise ValueError("The current crumb path has errors, got {}.".format(self.path))
82
83
    def _update(self):
84
        """ Clean up, parse the current crumb path and fill the internal
85
        members for functioning."""
86
        self._clean()
87
        self._check()
88
        self._set_argidx()
89
        # self._set_replace_func()
90
91
    def _clean(self):
92
        """ Clean up the private utility members, i.e., _argidx. """
93
        self._argidx = OrderedDict()
94
95
    @classmethod
96
    def copy(cls, crumb):
97
        """ Return a deep copy of the given `crumb`.
98
        Parameters
99
        ----------
100
        crumb: str or Crumb
101
102
        Returns
103
        -------
104
        copy: Crumb
105
        """
106
        if isinstance(crumb, cls):
107
            return cls(crumb._path, ignore_list=crumb._ignore)
108
        elif isinstance(crumb, string_types):
109
            return cls.from_path(crumb)
110
        else:
111
            raise TypeError("Expected a Crumb or a str to copy, got {}.".format(type(crumb)))
112
113
    def _set_argidx(self):
114
        """ Initialize the self._argidx dict. It holds arg_name -> index.
115
        The index is the position in the whole `_path.split(op.sep)` where each argument is.
116
        """
117
        fs = self._path_split()
118
        for idx, f in enumerate(fs):
119
            if self._is_crumb_arg(f):
120
                self._argidx[self._arg_name(f)] = idx
121
122
    def _find_arg(self, arg_name):
123
        """ Return the index in the current path of the crumb
124
        argument with name `arg_name`.
125
        """
126
        return self._argidx.get(arg_name, -1)
127
128
    def isabs(self):
129
        """ Return True if the current crumb path has an
130
        absolute path, False otherwise.
131
        This means that if it is valid and does not start with a `op.sep` character
132
        or hard disk letter.
133
        """
134
        if not self.is_valid(self._path):
135
            raise ValueError("The given crumb path has errors, got {}.".format(self.path))
136
137
        start_sym, _ = self._start_end_syms
138
        subp = self._path.split(start_sym)[0]
139
        return op.isabs(subp)
140
141
    def abspath(self, first_is_basedir=False):
142
        """ Return a copy of `self` with an absolute crumb path.
143
        Add as prefix the absolute path to the current directory if the current
144
        crumb is not absolute.
145
        Parameters
146
        ----------
147
        first_is_basedir: bool
148
            If True and the current crumb path starts with a crumb argument and first_is_basedir,
149
            the first argument will be replaced by the absolute path to the current dir,
150
            otherwise the absolute path to the current dir will be added as a prefix.
151
152
153
        Returns
154
        -------
155
        abs_crumb: Crumb
156
        """
157
        if not self.is_valid(self._path):
158
            raise ValueError("The given crumb path has errors, got {}.".format(self.path))
159
160
        if self.isabs():
161
            return deepcopy(self)
162
163
        return self.copy(self._abspath(first_is_basedir=first_is_basedir))
164
165
    def _path_split(self):
166
        return self._path.split(op.sep)
167
168
    def _abspath(self, first_is_basedir=False):
169
        """ Return the absolute path of the current crumb path.
170
        Parameters
171
        ----------
172
        first_is_basedir: bool
173
            If True and the current crumb path starts with a crumb argument and first_is_basedir,
174
            the first argument will be replaced by the absolute path to the current dir,
175
            otherwise the absolute path to the current dir will be added as a prefix.
176
177
178
        Returns
179
        -------
180
        abspath: str
181
        """
182
        if not self.has_crumbs(self._path):
183
             return op.abspath(self._path)
184
185
        splt = self._path_split()
186
        path = []
187
        if self._is_crumb_arg(splt[0]):
188
            path.append(op.abspath(op.curdir))
189
190
        if not first_is_basedir:
191
            path.append(splt[0])
192
193
        if splt[1:]:
194
            path.extend(splt[1:])
195
196
        return op.sep.join(path)
197
198
    def split(self):
199
        """ Return a list of sub-strings of the current crumb path where the
200
            path parts are separated from the crumb arguments.
201
202
        Returns
203
        -------
204
        crumbs: list of str
205
        """
206
        return self._split(self._path)
207
208
    @classmethod
209
    def from_path(cls, crumb_path):
210
        """ Create an instance of Crumb out of `crumb_path`.
211
        Parameters
212
        ----------
213
        val: str or Crumb or pathlib.Path
214
215
        Returns
216
        -------
217
        path: Crumb
218
        """
219
        if isinstance(crumb_path, (cls, Path)):
220
            return cls.copy(crumb_path)
221
222
        if isinstance(crumb_path, string_types):
223
            return cls(crumb_path)
224
        else:
225
            raise TypeError("Expected a `val` to be a `str`, got {}.".format(type(crumb_path)))
226
227
    def _lastarg(self):
228
        """ Return the name and idx of the last argument."""
229
        for arg, idx in reversed(list(self._argidx.items())):
230
            return arg, idx
231
232
    def _firstarg(self):
233
        """ Return the name and idx of the first argument."""
234
        for arg, idx in self._argidx.items():
235
            return arg, idx
236
237
    def _is_firstarg(self, arg_name):
238
        """ Return True if `arg_name` is the first argument."""
239
        # Take into account that self._argidx is OrderedDict
240
        return arg_name == self._firstarg()[0]
241
242
    def _arg_values(self, arg_name, arg_values=None):
243
        """ Return the existing values in the file system for the crumb argument
244
        with name `arg_name`.
245
        The `arg_values` must be a sequence with the tuples with valid values of the dependent
246
        (previous in the path) crumb arguments.
247
        The format of `arg_values` work in such a way that `self._path.format(dict(arg_values[0]))`
248
        would give me a valid path or crumb.
249
        Parameters
250
        ----------
251
        arg_name: str
252
253
        arg_values: list of tuples
254
255
        Returns
256
        -------
257
        vals: list of tuples
258
259
        Raises
260
        ------
261
        ValueError: if `arg_values` is None and `arg_name` is not the
262
        first crumb argument in self._path
263
264
        IOError: if this crosses to any path that is non-existing.
265
        """
266
        if arg_values is None and not self._is_firstarg(arg_name):
267
            raise ValueError("Cannot get the list of values for {} if"
268
                             " the previous arguments are not filled"
269
                             " in `paths`.".format(arg_name))
270
271
        aidx = self._find_arg(arg_name)
272
273
        # check if the path is absolute, do it absolute
274
        apath = self._abspath()
275
        splt = apath.split(op.sep)
276
277
        if aidx == len(splt) - 1:  # this means we have to list files too
278
            just_dirs = False
279
        else:  # this means we have to list folders
280
            just_dirs = True
281
282
        vals = []
283
        if arg_values is None:
284
            base = op.sep.join(splt[:aidx])
285
            vals = [[(arg_name, val)] for val in list_children(base, just_dirs=just_dirs, ignore=self._ignore)]
286
        else:
287
            for aval in arg_values:
288
                #  create the part of the crumb path that is already specified
289
                path = self._split(self._replace(self._path, **dict(aval)))[0]
290
291
                #  list the children of `path`
292
                subpaths = list_children(path, just_dirs=just_dirs, ignore=self._ignore)
293
294
                #  extend `val` tuples with the new list of values for `aval`
295
                vals.extend([aval + [(arg_name, sp)] for sp in subpaths])
296
297
        return vals
298
299
    def replace(self, **kwargs):
300
        """ Return a copy of self with the crumb arguments in
301
        `kwargs` replaced by its values.
302
        Parameters
303
        ----------
304
        kwargs: strings
305
306
        Returns
307
        -------
308
        crumb:
309
        """
310
        for arg_name in kwargs:
311
            if arg_name not in self._argidx:
312
                raise KeyError("Expected `arg_name` to be one of ({}),"
313
                                 " got {}.".format(list(self._argidx), arg_name))
314
315
        cr = self.copy(self)
316
        cr._path = cr._replace(self._path, **kwargs)
317
        return Crumb.from_path(cr._path)
318
319
    def _arg_deps(self, arg_name):
320
        """ Return a subdict of `self._argidx` with the
321
         values from the crumb arguments that come before
322
         `arg_name` in the crumb path.
323
        Parameters
324
        ----------
325
        arg_name: str
326
327
        Returns
328
        -------
329
        arg_deps: Mapping[str, int]
330
        """
331
        argidx = self._find_arg(arg_name)
332
        return OrderedDict([(arg, idx) for arg, idx in self._argidx.items() if idx <= argidx])
333
334
    def values_map(self, arg_name, check_exists=False):
335
        """ Return a list of tuples of crumb arguments with their values.
336
337
        Parameters
338
        ----------
339
        arg_name: str
340
341
        check_exists: bool
342
343
        Returns
344
        -------
345
        values_map: list of lists of 2-tuples
346
        """
347
        arg_deps = self._arg_deps(arg_name)
348
        values_map = None
349
        for arg in arg_deps:
350
            values_map = self._arg_values(arg, values_map)
351
352
        if check_exists:
353
            paths = [self.from_path(path) for path in self._build_paths(values_map)]
354
            values_map_checked = [args for args, path in zip(values_map, paths) if path.exists()]
355
        else:
356
            values_map_checked = values_map
357
358
        return values_map_checked
359
360
    def _build_paths(self, values_map):
361
        """ Return a list of paths from each tuple of args from `values_map`
362
        Parameters
363
        ----------
364
        values_map: list of sequences of 2-tuple
365
366
        Returns
367
        -------
368
        paths: list of str
369
        """
370
        return [self._replace(self._path, **dict(val)) for val in values_map]
371
372
    def ls(self, arg_name, fullpath=True, make_crumbs=True, check_exists=False):
373
        """
374
        Return the list of values for the argument crumb `arg_name`.
375
        This will also unfold any other argument crumb that appears before in the
376
        path.
377
        Parameters
378
        ----------
379
        arg_name: str
380
            Name of the argument crumb to be unfolded.
381
382
        fullpath: bool
383
            If True will build the full path of the crumb path, will also append
384
            the rest of crumbs not unfolded.
385
            If False will only return the values for the argument with name
386
            `arg_name`.
387
388
        make_crumbs: bool
389
            If `fullpath` and `make_crumbs` is True will create a Crumb for
390
            each element of the result.
391
392
        check_exists: bool
393
            If True will return only str, Crumb or Path if it exists
394
            in the file path, otherwise it may create file paths
395
            that don't have to exist.
396
397
        Returns
398
        -------
399
        values: list of str or Crumb
400
401
        Examples
402
        --------
403
        >>> cr = Crumb(op.join(op.expanduser('~'), '{user_folder}'))
404
        >>> user_folders = cr.ls('user_folder',fullpath=True,make_crumbs=True)
405
        """
406
        if arg_name not in self._argidx:
407
            raise ValueError("Expected `arg_name` to be one of ({}),"
408
                             " got {}.".format(list(self._argidx), arg_name))
409
410
        start_sym, _ = self._start_end_syms
411
412
        # if the first chunk of the path is a parameter, I am not interested in this (for now)
413
        if self._path.startswith(start_sym):
414
            raise NotImplementedError("Can't list paths that starts"
415
                                      " with an argument.")
416
417
        if make_crumbs and not fullpath:
418
            raise ValueError("`make_crumbs` can only work if `fullpath` is also True.")
419
420
        values_map = self.values_map(arg_name, check_exists=check_exists)
421
422
        if not fullpath and not make_crumbs:
423
            paths = [dict(val)[arg_name] for val in values_map]
424
425
        elif fullpath and not make_crumbs:
426
            paths = sorted(self._build_paths(values_map))
427
428
        elif fullpath and make_crumbs:
429
            paths = sorted(self._build_paths(values_map))
430
            paths = [self.from_path(path) for path in paths]
431
432
        return paths
433
434
    def _remaining_deps(self, arg_names):
435
        """ Return the name of the arguments that are dependencies of `arg_names`.
436
        Parameters
437
        ----------
438
        arg_names: Sequence[str]
439
440
        Returns
441
        -------
442
        rem_deps: Sequence[str]
443
        """
444
        started = False
445
        rem_deps = []
446
        for an in reversed(list(self._argidx.keys())):  # take into account that argidx is ordered
447
            if an in arg_names:
448
                started = True
449
            else:
450
                if started:
451
                    rem_deps.append(an)
452
453
        return rem_deps
454
455
    def touch(self):
456
        """ Create a leaf directory and all intermediate ones
457
        using the non crumbed part of `crumb_path`.
458
        If the target directory already exists, raise an IOError
459
        if exist_ok is False. Otherwise no exception is raised.
460
        Parameters
461
        ----------
462
        crumb_path: str
463
464
        exist_ok: bool
465
            Default = True
466
467
        Returns
468
        -------
469
        nupath: str
470
            The new path created.
471
        """
472
        return self._touch(self._path)
473
474
    def joinpath(self, suffix):
475
        """ Return a copy of the current crumb with the `suffix` path appended.
476
        If suffix has crumb arguments, the whole crumb will be updated.
477
        Parameters
478
        ----------
479
        suffix: str
480
481
        Returns
482
        -------
483
        cr: Crumb
484
        """
485
        return Crumb(op.join(self._path, suffix))
486
487
    def exists(self):
488
        """ Return True if the current crumb path is a possibly existing path,
489
        False otherwise.
490
        Returns
491
        -------
492
        exists: bool
493
        """
494
        if not self.has_crumbs(self._path):
495
            return op.exists(str(self)) or op.islink(str(self))
496
497
        if not op.exists(self.split()[0]):
498
            return False
499
500
        last, _ = self._lastarg()
501
        paths = self.ls(last, fullpath=True, make_crumbs=False, check_exists=False)
502
503
        return all([self._split_exists(lp) for lp in paths])
504
505
    def has_files(self):
506
        """ Return True if the current crumb path has any file in its
507
        possible paths.
508
        Returns
509
        -------
510
        has_files: bool
511
        """
512
        if not op.exists(self.split()[0]):
513
            return False
514
515
        last, _ = self._lastarg()
516
        paths = self.ls(last, fullpath=True, make_crumbs=True, check_exists=True)
517
518
        return any([op.isfile(str(lp)) for lp in paths])
519
520
    def unfold(self):
521
        """ Return a list of all the existing paths until the last crumb argument.
522
        Returns
523
        -------
524
        paths: list of pathlib.Path
525
        """
526
        return self.ls(self._lastarg()[0], fullpath=True, make_crumbs=True, check_exists=True)
527
528
    def __getitem__(self, arg_name):
529
        """ Return the existing values of the crumb argument `arg_name`
530
        without removing duplicates.
531
        Parameters
532
        ----------
533
        arg_name: str
534
535
        Returns
536
        -------
537
        values: list of str
538
        """
539
        return self.ls(arg_name, fullpath=False, make_crumbs=False, check_exists=True)
540
541
    def __setitem__(self, key, value):
542
        if key not in self._argidx:
543
            raise KeyError("Expected `arg_name` to be one of ({}),"
544
                           " got {}.".format(list(self._argidx), key))
545
546
        self._path = self._replace(self._path, **{key: value})
547
        self._update()
548
549
    def __ge__(self, other):
550
        return self._path >= str(other)
551
552
    def __le__(self, other):
553
        return self._path <= str(other)
554
555
    def __gt__(self, other):
556
        return self._path > str(other)
557
558
    def __lt__(self, other):
559
        return self._path < str(other)
560
561
    def __hash__(self):
562
        return self._path.__hash__()
563
564
    def __contains__(self, item):
565
        return item in self._argidx
566
567
    def __repr__(self):
568
        return '{}("{}")'.format(__class__.__name__, self._path)
569
570
    def __str__(self):
571
        return str(self._path)
572
573
    def __eq__(self, other):
574
        """ Return True if `self` and `other` are equal, False otherwise.
575
        Parameters
576
        ----------
577
        other: Crumb
578
579
        Returns
580
        -------
581
        is_equal: bool
582
        """
583
        if self._path != other._path:
584
            return False
585
586
        if self._argidx != other._argidx:
587
            return False
588
589
        if self._ignore != other._ignore:
590
            return False
591
592
        return True
593
594