Completed
Push — master ( 8f9d5c...861f20 )
by Alexandre M.
05:33
created

hansel.Crumb.ls()   D

Complexity

Conditions 11

Size

Total Lines 59

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 11
dl 0
loc 59
rs 4.2188

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like hansel.Crumb.ls() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
3
# vi: set ft=python sts=4 ts=4 sw=4 et:
4
"""
5
Crumb class: the smart path model class.
6
"""
7
8
import os.path     as op
9
from   copy        import deepcopy
10
from   collections import OrderedDict, Mapping, Sequence
11
from   pathlib     import Path
12
from   functools   import partial
13
14
from   six import string_types
15
16
from   .utils import remove_duplicates, list_children
17
from   ._utils import (_get_path, _arg_name,
18
                       _is_crumb_arg, _replace,
19
                       _split_exists, _split,
20
                       _touch, has_crumbs, is_valid,
21
                       #_arg_format,
22
                       )
23
24
25
class Crumb(object):
26
    """ The crumb path model class.
27
    Parameters
28
    ----------
29
    crumb_path: str
30
        A file or folder path with crumb arguments. See Examples.
31
32
    ignore_list: sequence of str
33
        A list of `fnmatch` patterns of filenames to be ignored.
34
35
    Examples
36
    --------
37
    >>> crumb = Crumb("{base_dir}/raw/{subject_id}/{session_id}/{modality}/{image}")
38
    >>> cr = Crumb(op.join(op.expanduser('~'), '{user_folder}'))
39
    """
40
    # symbols indicating start and end of a crumb argument
41
    _start_end_syms = ('{', '}')
42
43
    # specify partial functions from _utils with _arg_start_sym and _arg_end_sym
44
    # everything would be much simpler if I hardcoded these symbols but I still
45
    # feel that this flexibility is nice to have.
46
    # _arg_format   = partial(_arg_format,   start_sym=_arg_start_sym, end_sym=_arg_end_sym)
47
    _is_crumb_arg = partial(_is_crumb_arg, start_end_syms=_start_end_syms)
48
    _arg_name     = partial(_arg_name,     start_end_syms=_start_end_syms)
49
    is_valid      = partial(is_valid,      start_end_syms=_start_end_syms)
50
    has_crumbs    = partial(has_crumbs,    start_end_syms=_start_end_syms)
51
    _replace      = partial(_replace,      start_end_syms=_start_end_syms)
52
    _split        = partial(_split,        start_end_syms=_start_end_syms)
53
    _touch        = partial(_touch,        start_end_syms=_start_end_syms)
54
    _split_exists = partial(_split_exists, start_end_syms=_start_end_syms)
55
56
57
    def __init__(self, crumb_path, ignore_list=()):
58
        self._path   = _get_path(crumb_path)
59
        self._argidx = OrderedDict()
60
        self._ignore = ignore_list
61
        self._update()
62
63
    @property
64
    def path(self):
65
        """Return the current crumb path string."""
66
        return self._path
67
68
    @path.setter
69
    def path(self, value):
70
        """ Set the current crumb path string and updates the internal members.
71
        Parameters
72
        ----------
73
        value: str
74
            A file or folder path with crumb arguments. See Examples in class docstring.
75
        """
76
        self._path = value
77
        self._update()
78
79
    def _check(self):
80
        if not self.is_valid(self._path):
81
            raise ValueError("The current crumb path has errors, got {}.".format(self.path))
82
83
    def _update(self):
84
        """ Clean up, parse the current crumb path and fill the internal
85
        members for functioning."""
86
        self._clean()
87
        self._check()
88
        self._set_argidx()
89
        # self._set_replace_func()
90
91
    def _clean(self):
92
        """ Clean up the private utility members, i.e., _argidx. """
93
        self._argidx = OrderedDict()
94
95
    @classmethod
96
    def copy(cls, crumb):
97
        """ Return a deep copy of the given `crumb`.
98
        Parameters
99
        ----------
100
        crumb: str or Crumb
101
102
        Returns
103
        -------
104
        copy: Crumb
105
        """
106
        if isinstance(crumb, cls):
107
            return cls(crumb._path, ignore_list=crumb._ignore)
108
        elif isinstance(crumb, string_types):
109
            return cls.from_path(crumb)
110
        else:
111
            raise TypeError("Expected a Crumb or a str to copy, got {}.".format(type(crumb)))
112
113
    def _set_argidx(self):
114
        """ Initialize the self._argidx dict. It holds arg_name -> index.
115
        The index is the position in the whole `_path.split(op.sep)` where each argument is.
116
        """
117
        fs = self._path_split()
118
        for idx, f in enumerate(fs):
119
            if self._is_crumb_arg(f):
120
                self._argidx[self._arg_name(f)] = idx
121
122
    def _find_arg(self, arg_name):
123
        """ Return the index in the current path of the crumb
124
        argument with name `arg_name`.
125
        """
126
        return self._argidx.get(arg_name, -1)
127
128
    def isabs(self):
129
        """ Return True if the current crumb path has an
130
        absolute path, False otherwise.
131
        This means that if it is valid and does not start with a `op.sep` character
132
        or hard disk letter.
133
        """
134
        if not self.is_valid(self._path):
135
            raise ValueError("The given crumb path has errors, got {}.".format(self.path))
136
137
        start_sym, _ = self._start_end_syms
138
        subp = self._path.split(start_sym)[0]
139
        return op.isabs(subp)
140
141
    def abspath(self, first_is_basedir=False):
142
        """ Return a copy of `self` with an absolute crumb path.
143
        Add as prefix the absolute path to the current directory if the current
144
        crumb is not absolute.
145
        Parameters
146
        ----------
147
        first_is_basedir: bool
148
            If True and the current crumb path starts with a crumb argument and first_is_basedir,
149
            the first argument will be replaced by the absolute path to the current dir,
150
            otherwise the absolute path to the current dir will be added as a prefix.
151
152
153
        Returns
154
        -------
155
        abs_crumb: Crumb
156
        """
157
        if not self.is_valid(self._path):
158
            raise ValueError("The given crumb path has errors, got {}.".format(self.path))
159
160
        if self.isabs():
161
            return deepcopy(self)
162
163
        return self.copy(self._abspath(first_is_basedir=first_is_basedir))
164
165
    def _path_split(self):
166
        return self._path.split(op.sep)
167
168
    def _abspath(self, first_is_basedir=False):
169
        """ Return the absolute path of the current crumb path.
170
        Parameters
171
        ----------
172
        first_is_basedir: bool
173
            If True and the current crumb path starts with a crumb argument and first_is_basedir,
174
            the first argument will be replaced by the absolute path to the current dir,
175
            otherwise the absolute path to the current dir will be added as a prefix.
176
177
178
        Returns
179
        -------
180
        abspath: str
181
        """
182
        if not self.has_crumbs(self._path):
183
             return op.abspath(self._path)
184
185
        splt = self._path_split()
186
        path = []
187
        if self._is_crumb_arg(splt[0]):
188
            path.append(op.abspath(op.curdir))
189
190
        if not first_is_basedir:
191
            path.append(splt[0])
192
193
        if splt[1:]:
194
            path.extend(splt[1:])
195
196
        return op.sep.join(path)
197
198
    def split(self):
199
        """ Return a list of sub-strings of the current crumb path where the
200
            path parts are separated from the crumb arguments.
201
202
        Returns
203
        -------
204
        crumbs: list of str
205
        """
206
        return self._split(self._path)
207
208
    @classmethod
209
    def from_path(cls, crumb_path):
210
        """ Create an instance of Crumb out of `crumb_path`.
211
        Parameters
212
        ----------
213
        val: str or Crumb or pathlib.Path
214
215
        Returns
216
        -------
217
        path: Crumb
218
        """
219
        if isinstance(crumb_path, (cls, Path)):
220
            return crumb_path
221
222
        if isinstance(crumb_path, string_types):
223
            return cls(crumb_path)
224
        else:
225
            raise TypeError("Expected a `val` to be a `str`, got {}.".format(type(crumb_path)))
226
227
    # def _set_replace_func(self):
228
    #     """ Set the fastest replace algorithm depending on how
229
    #     many arguments the path has."""
230
    #     self._replace = self._replace2
231
    #     if len(self._argidx) > 5:
232
    #         self._replace = self._replace1
233
234
    # def _replace2(self, start_sym='{', end_sym='}', **kwargs):
235
    #
236
    #     if start_sym != '{' or end_sym != '}':
237
    #         raise NotImplementedError
238
    #
239
    #     if not kwargs:
240
    #         return self._path
241
    #
242
    #     args = {v: self._arg_format(v) for v in self._argidx}
243
    #
244
    #     for k in kwargs:
245
    #         if k not in args:
246
    #             raise KeyError("Could not find argument {}"
247
    #                            " in `path` {}.".format(k, self._path))
248
    #
249
    #         args[k] = kwargs[k]
250
    #
251
    #     return self._path.format_map(args)
252
253
    def _lastarg(self):
254
        """ Return the name and idx of the last argument."""
255
        for arg, idx in reversed(list(self._argidx.items())):
256
            return arg, idx
257
258
    def _firstarg(self):
259
        """ Return the name and idx of the first argument."""
260
        for arg, idx in self._argidx.items():
261
            return arg, idx
262
263
    def _is_firstarg(self, arg_name):
264
        """ Return True if `arg_name` is the first argument."""
265
        # Take into account that self._argidx is OrderedDict
266
        return arg_name == self._firstarg()[0]
267
268
    def _arg_values(self, arg_name, arg_values=None):
269
        """ Return the existing values in the file system for the crumb argument
270
        with name `arg_name`.
271
        The `arg_values` must be a sequence with the tuples with valid values of the dependent
272
        (previous in the path) crumb arguments.
273
        The format of `arg_values` work in such a way that `self._path.format(dict(arg_values[0]))`
274
        would give me a valid path or crumb.
275
        Parameters
276
        ----------
277
        arg_name: str
278
279
        arg_values: list of tuples
280
281
        Returns
282
        -------
283
        vals: list of tuples
284
285
        Raises
286
        ------
287
        ValueError: if `arg_values` is None and `arg_name` is not the
288
        first crumb argument in self._path
289
290
        IOError: if this crosses to any path that is non-existing.
291
        """
292
        if arg_values is None and not self._is_firstarg(arg_name):
293
            raise ValueError("Cannot get the list of values for {} if"
294
                             " the previous arguments are not filled"
295
                             " in `paths`.".format(arg_name))
296
297
        aidx = self._find_arg(arg_name)
298
299
        # check if the path is absolute, do it absolute
300
        apath = self._abspath()
301
        splt = apath.split(op.sep)
302
303
        if aidx == len(splt) - 1:  # this means we have to list files too
304
            just_dirs = False
305
        else:  # this means we have to list folders
306
            just_dirs = True
307
308
        vals = []
309
        if arg_values is None:
310
            base = op.sep.join(splt[:aidx])
311
            vals = [[(arg_name, val)] for val in list_children(base, just_dirs=just_dirs, ignore=self._ignore)]
312
        else:
313
            for aval in arg_values:
314
                #  create the part of the crumb path that is already specified
315
                path = self._split(self._replace(self._path, **dict(aval)))[0]
316
317
                #  list the children of `path`
318
                subpaths = list_children(path, just_dirs=just_dirs, ignore=self._ignore)
319
320
                #  extend `val` tuples with the new list of values for `aval`
321
                vals.extend([aval + [(arg_name, sp)] for sp in subpaths])
322
323
        return vals
324
325
    def replace(self, **kwargs):
326
        """ Return a copy of self with the crumb arguments in
327
        `kwargs` replaced by its values.
328
        Parameters
329
        ----------
330
        kwargs: strings
331
332
        Returns
333
        -------
334
        crumb:
335
        """
336
        for arg_name in kwargs:
337
            if arg_name not in self._argidx:
338
                raise KeyError("Expected `arg_name` to be one of ({}),"
339
                                 " got {}.".format(list(self._argidx), arg_name))
340
341
        cr = self.copy(self)
342
        cr._path = cr._replace(self._path, **kwargs)
343
        return Crumb.from_path(cr._path)
344
345
    def _arg_deps(self, arg_name):
346
        """ Return a subdict of `self._argidx` with the
347
         values from the crumb arguments that come before
348
         `arg_name` in the crumb path.
349
        Parameters
350
        ----------
351
        arg_name: str
352
353
        Returns
354
        -------
355
        arg_deps: Mapping[str, int]
356
        """
357
        argidx = self._find_arg(arg_name)
358
        return OrderedDict([(arg, idx) for arg, idx in self._argidx.items() if idx <= argidx])
359
360
    def values_map(self, arg_name, check_exists=False):
361
        """ Return a list of tuples of crumb arguments with their values.
362
363
        Parameters
364
        ----------
365
        arg_name: str
366
367
        check_exists: bool
368
369
        Returns
370
        -------
371
        values_map: list of lists of 2-tuples
372
        """
373
        arg_deps = self._arg_deps(arg_name)
374
        values_map = None
375
        for arg in arg_deps:
376
            values_map = self._arg_values(arg, values_map)
377
378
        if check_exists:
379
            paths = [self.from_path(path) for path in self._build_paths(values_map)]
380
            values_map_checked = [args for args, path in zip(values_map, paths) if path.exists()]
381
        else:
382
            values_map_checked = values_map
383
384
        return values_map_checked
385
386
    def _build_paths(self, values_map):
387
        """ Return a list of paths from each tuple of args from `values_map`
388
        Parameters
389
        ----------
390
        values_map: list of sequences of 2-tuple
391
392
        Returns
393
        -------
394
        paths: list of str
395
        """
396
        return [self._replace(self._path, **dict(val)) for val in values_map]
397
398
    def ls(self, arg_name, fullpath=True, make_crumbs=True, check_exists=False):
399
        """
400
        Return the list of values for the argument crumb `arg_name`.
401
        This will also unfold any other argument crumb that appears before in the
402
        path.
403
        Parameters
404
        ----------
405
        arg_name: str
406
            Name of the argument crumb to be unfolded.
407
408
        fullpath: bool
409
            If True will build the full path of the crumb path, will also append
410
            the rest of crumbs not unfolded.
411
            If False will only return the values for the argument with name
412
            `arg_name`.
413
414
        make_crumbs: bool
415
            If `fullpath` and `make_crumbs` is True will create a Crumb for
416
            each element of the result.
417
418
        check_exists: bool
419
            If True will return only str, Crumb or Path if it exists
420
            in the file path, otherwise it may create file paths
421
            that don't have to exist.
422
423
        Returns
424
        -------
425
        values: list of str or Crumb
426
427
        Examples
428
        --------
429
        >>> cr = Crumb(op.join(op.expanduser('~'), '{user_folder}'))
430
        >>> user_folders = cr.ls('user_folder',fullpath=True,make_crumbs=True)
431
        """
432
        if arg_name not in self._argidx:
433
            raise ValueError("Expected `arg_name` to be one of ({}),"
434
                             " got {}.".format(list(self._argidx), arg_name))
435
436
        start_sym, _ = self._start_end_syms
437
438
        # if the first chunk of the path is a parameter, I am not interested in this (for now)
439
        if self._path.startswith(start_sym):
440
            raise NotImplementedError("Can't list paths that starts"
441
                                      " with an argument.")
442
443
        if make_crumbs and not fullpath:
444
            raise ValueError("`make_crumbs` can only work if `fullpath` is also True.")
445
446
        values_map = self.values_map(arg_name, check_exists=check_exists)
447
448
        if not fullpath and not make_crumbs:
449
            paths = [dict(val)[arg_name] for val in values_map]
450
        else:
451
            paths = sorted(self._build_paths(values_map))
452
453
        if fullpath and make_crumbs:
454
            paths = sorted([self.from_path(path) for path in paths])
455
456
        return paths
457
458
    def _remaining_deps(self, arg_names):
459
        """ Return the name of the arguments that are dependencies of `arg_names`.
460
        Parameters
461
        ----------
462
        arg_names: Sequence[str]
463
464
        Returns
465
        -------
466
        rem_deps: Sequence[str]
467
        """
468
        started = False
469
        rem_deps = []
470
        for an in reversed(list(self._argidx.keys())):  # take into account that argidx is ordered
471
            if an in arg_names:
472
                started = True
473
            else:
474
                if started:
475
                    rem_deps.append(an)
476
477
        return rem_deps
478
479
    def touch(self):
480
        """ Create a leaf directory and all intermediate ones
481
        using the non crumbed part of `crumb_path`.
482
        If the target directory already exists, raise an IOError
483
        if exist_ok is False. Otherwise no exception is raised.
484
        Parameters
485
        ----------
486
        crumb_path: str
487
488
        exist_ok: bool
489
            Default = True
490
491
        Returns
492
        -------
493
        nupath: str
494
            The new path created.
495
        """
496
        return self._touch(self._path)
497
498
    def joinpath(self, suffix):
499
        """ Return a copy of the current crumb with the `suffix` path appended.
500
        If suffix has crumb arguments, the whole crumb will be updated.
501
        Parameters
502
        ----------
503
        suffix: str
504
505
        Returns
506
        -------
507
        cr: Crumb
508
        """
509
        return Crumb(op.join(self._path, suffix))
510
511
    def exists(self):
512
        """ Return True if the current crumb path is a possibly existing path,
513
        False otherwise.
514
        Returns
515
        -------
516
        exists: bool
517
        """
518
        if not self.has_crumbs(self._path):
519
            return op.exists(str(self)) or op.islink(str(self))
520
521
        if not op.exists(self.split()[0]):
522
            return False
523
524
        last, _ = self._lastarg()
525
        paths = self.ls(last, fullpath=True, make_crumbs=False, check_exists=False)
526
527
        return all([self._split_exists(lp) for lp in paths])
528
529
    def has_files(self):
530
        """ Return True if the current crumb path has any file in its
531
        possible paths.
532
        Returns
533
        -------
534
        has_files: bool
535
        """
536
        if not op.exists(self.split()[0]):
537
            return False
538
539
        last, _ = self._lastarg()
540
        paths = self.ls(last, fullpath=True, make_crumbs=True, check_exists=True)
541
542
        return any([op.isfile(str(lp)) for lp in paths])
543
544
    def unfold(self):
545
        """ Return a list of all the existing paths until the last crumb argument.
546
        Returns
547
        -------
548
        paths: list of pathlib.Path
549
        """
550
        return self.ls(self._lastarg()[0], fullpath=True, make_crumbs=True, check_exists=True)
551
552
    def __getitem__(self, arg_name):
553
        """ Return the existing values of the crumb argument `arg_name`
554
        without removing duplicates.
555
        Parameters
556
        ----------
557
        arg_name: str
558
559
        Returns
560
        -------
561
        values: list of str
562
        """
563
        return self.ls(arg_name, fullpath=False, make_crumbs=False, check_exists=True)
564
565
    def __setitem__(self, key, value):
566
        if key not in self._argidx:
567
            raise KeyError("Expected `arg_name` to be one of ({}),"
568
                           " got {}.".format(list(self._argidx), key))
569
570
        self._path = self._replace(self._path, **{key: value})
571
        self._update()
572
573
    def __ge__(self, other):
574
        return self._path >= str(other)
575
576
    def __le__(self, other):
577
        return self._path <= str(other)
578
579
    def __gt__(self, other):
580
        return self._path > str(other)
581
582
    def __lt__(self, other):
583
        return self._path < str(other)
584
585
    def __hash__(self):
586
        return self._path.__hash__()
587
588
    def __contains__(self, item):
589
        return item in self._argidx
590
591
    def __repr__(self):
592
        return '{}("{}")'.format(__class__.__name__, self._path)
593
594
    def __str__(self):
595
        return str(self._path)
596
597
    def __eq__(self, other):
598
        """ Return True if `self` and `other` are equal, False otherwise.
599
        Parameters
600
        ----------
601
        other: Crumb
602
603
        Returns
604
        -------
605
        is_equal: bool
606
        """
607
        if self._path != other._path:
608
            return False
609
610
        if self._argidx != other._argidx:
611
            return False
612
613
        if self._ignore != other._ignore:
614
            return False
615
616
        return True
617
618