Completed
Push — master ( 160c42...efd5b4 )
by Alexandre M.
56s
created

hansel.Crumb._arg_values()   C

Complexity

Conditions 8

Size

Total Lines 56

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 8
dl 0
loc 56
rs 6.1256

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
3
# vi: set ft=python sts=4 ts=4 sw=4 et:
4
"""
5
Crumb class: the smart path model class.
6
"""
7
8
import os.path     as op
9
from   copy        import deepcopy
10
from   collections import OrderedDict, Mapping, Sequence
11
from   pathlib     import Path
12
from   functools   import partial
13
14
from   six import string_types
15
16
from   .utils import list_children
17
from   ._utils import (_get_path, _arg_name,
18
                       _is_crumb_arg, _replace,
19
                       _split_exists, _split,
20
                       _touch, has_crumbs, is_valid,
21
                       #_arg_format,
22
                       )
23
24
25
class Crumb(object):
26
    """ The crumb path model class.
27
    Parameters
28
    ----------
29
    crumb_path: str
30
        A file or folder path with crumb arguments. See Examples.
31
32
    ignore_list: sequence of str
33
        A list of `fnmatch` patterns of filenames to be ignored.
34
35
    Examples
36
    --------
37
    >>> crumb = Crumb("{base_dir}/raw/{subject_id}/{session_id}/{modality}/{image}")
38
    >>> cr = Crumb(op.join(op.expanduser('~'), '{user_folder}'))
39
    """
40
    # symbols indicating start and end of a crumb argument
41
    _start_end_syms = ('{', '}')
42
43
    # specify partial functions from _utils with _arg_start_sym and _arg_end_sym
44
    # everything would be much simpler if I hardcoded these symbols but I still
45
    # feel that this flexibility is nice to have.
46
    # _arg_format   = partial(_arg_format,   start_sym=_arg_start_sym, end_sym=_arg_end_sym)
47
    _is_crumb_arg = partial(_is_crumb_arg, start_end_syms=_start_end_syms)
48
    _arg_name     = partial(_arg_name,     start_end_syms=_start_end_syms)
49
    is_valid      = partial(is_valid,      start_end_syms=_start_end_syms)
50
    has_crumbs    = partial(has_crumbs,    start_end_syms=_start_end_syms)
51
    _replace      = partial(_replace,      start_end_syms=_start_end_syms)
52
    _split        = partial(_split,        start_end_syms=_start_end_syms)
53
    _touch        = partial(_touch,        start_end_syms=_start_end_syms)
54
    _split_exists = partial(_split_exists, start_end_syms=_start_end_syms)
55
56
    def __init__(self, crumb_path, ignore_list=()):
57
        self._path   = _get_path(crumb_path)
58
        self._argidx = OrderedDict()  # in which order the crumb argument appears
59
        self._argval = {}  # what is the value of the argument in the current path
60
        self._ignore = ignore_list
61
        self._update()
62
63
    @property
64
    def path(self):
65
        """Return the current crumb path string."""
66
        return self._path
67
68
    @path.setter
69
    def path(self, value):
70
        """ Set the current crumb path string and updates the internal members.
71
        Parameters
72
        ----------
73
        value: str
74
            A file or folder path with crumb arguments. See Examples in class docstring.
75
        """
76
        self._path = value
77
        self._update()
78
79
    def _check(self):
80
        if not self.is_valid(self._path):
81
            raise ValueError("The current crumb path has errors, got {}.".format(self.path))
82
83
    def _update(self):
84
        """ Clean up, parse the current crumb path and fill the internal
85
        members for functioning."""
86
        self._clean()
87
        self._check()
88
        self._set_argidx()
89
        # self._set_replace_func()
90
91
    def _clean(self):
92
        """ Clean up the private utility members, i.e., _argidx. """
93
        self._argidx = OrderedDict()
94
95
    @classmethod
96
    def copy(cls, crumb):
97
        """ Return a deep copy of the given `crumb`.
98
        Parameters
99
        ----------
100
        crumb: str or Crumb
101
102
        Returns
103
        -------
104
        copy: Crumb
105
        """
106
        if isinstance(crumb, cls):
107
            nucr = cls(crumb._path, ignore_list=crumb._ignore)
108
            nucr._argval = deepcopy(crumb._argval)
109
            return nucr
110
        elif isinstance(crumb, string_types):
111
            return cls.from_path(crumb)
112
        else:
113
            raise TypeError("Expected a Crumb or a str to copy, got {}.".format(type(crumb)))
114
115
    def _set_argidx(self):
116
        """ Initialize the self._argidx dict. It holds arg_name -> index.
117
        The index is the position in the whole `_path.split(op.sep)` where each argument is.
118
        """
119
        fs = self._path_split()
120
        for idx, f in enumerate(fs):
121
            if self._is_crumb_arg(f):
122
                self._argidx[self._arg_name(f)] = idx
123
124
    def _find_arg(self, arg_name):
125
        """ Return the index in the current path of the crumb
126
        argument with name `arg_name`.
127
        """
128
        return self._argidx.get(arg_name, -1)
129
130
    def isabs(self):
131
        """ Return True if the current crumb path has an
132
        absolute path, False otherwise.
133
        This means that if it is valid and does not start with a `op.sep` character
134
        or hard disk letter.
135
        """
136
        if not self.is_valid(self._path):
137
            raise ValueError("The given crumb path has errors, got {}.".format(self.path))
138
139
        start_sym, _ = self._start_end_syms
140
        subp = self._path.split(start_sym)[0]
141
        return op.isabs(subp)
142
143
    def abspath(self, first_is_basedir=False):
144
        """ Return a copy of `self` with an absolute crumb path.
145
        Add as prefix the absolute path to the current directory if the current
146
        crumb is not absolute.
147
        Parameters
148
        ----------
149
        first_is_basedir: bool
150
            If True and the current crumb path starts with a crumb argument and first_is_basedir,
151
            the first argument will be replaced by the absolute path to the current dir,
152
            otherwise the absolute path to the current dir will be added as a prefix.
153
154
155
        Returns
156
        -------
157
        abs_crumb: Crumb
158
        """
159
        if not self.is_valid(self._path):
160
            raise ValueError("The given crumb path has errors, got {}.".format(self.path))
161
162
        if self.isabs():
163
            return deepcopy(self)
164
165
        return self.copy(self._abspath(first_is_basedir=first_is_basedir))
166
167
    def _path_split(self):
168
        return self._path.split(op.sep)
169
170
    def _abspath(self, first_is_basedir=False):
171
        """ Return the absolute path of the current crumb path.
172
        Parameters
173
        ----------
174
        first_is_basedir: bool
175
            If True and the current crumb path starts with a crumb argument and first_is_basedir,
176
            the first argument will be replaced by the absolute path to the current dir,
177
            otherwise the absolute path to the current dir will be added as a prefix.
178
179
180
        Returns
181
        -------
182
        abspath: str
183
        """
184
        if not self.has_crumbs(self._path):
185
             return op.abspath(self._path)
186
187
        splt = self._path_split()
188
        path = []
189
        if self._is_crumb_arg(splt[0]):
190
            path.append(op.abspath(op.curdir))
191
192
        if not first_is_basedir:
193
            path.append(splt[0])
194
195
        if splt[1:]:
196
            path.extend(splt[1:])
197
198
        return op.sep.join(path)
199
200
    def split(self):
201
        """ Return a list of sub-strings of the current crumb path where the
202
            path parts are separated from the crumb arguments.
203
204
        Returns
205
        -------
206
        crumbs: list of str
207
        """
208
        return self._split(self._path)
209
210
    @classmethod
211
    def from_path(cls, crumb_path):
212
        """ Create an instance of Crumb out of `crumb_path`.
213
        Parameters
214
        ----------
215
        val: str or Crumb or pathlib.Path
216
217
        Returns
218
        -------
219
        path: Crumb
220
        """
221
        if isinstance(crumb_path, (cls, Path)):
222
            return cls.copy(crumb_path)
223
224
        if isinstance(crumb_path, string_types):
225
            return cls(crumb_path)
226
        else:
227
            raise TypeError("Expected a `val` to be a `str`, got {}.".format(type(crumb_path)))
228
229
    def _lastarg(self):
230
        """ Return the name and idx of the last argument."""
231
        for arg, idx in reversed(list(self._argidx.items())):
232
            return arg, idx
233
234
    def _firstarg(self):
235
        """ Return the name and idx of the first argument."""
236
        for arg, idx in self._argidx.items():
237
            return arg, idx
238
239
    def _is_firstarg(self, arg_name):
240
        """ Return True if `arg_name` is the first argument."""
241
        # Take into account that self._argidx is OrderedDict
242
        return arg_name == self._firstarg()[0]
243
244
    def _arg_values(self, arg_name, arg_values=None):
245
        """ Return the existing values in the file system for the crumb argument
246
        with name `arg_name`.
247
        The `arg_values` must be a sequence with the tuples with valid values of the dependent
248
        (previous in the path) crumb arguments.
249
        The format of `arg_values` work in such a way that `self._path.format(dict(arg_values[0]))`
250
        would give me a valid path or crumb.
251
        Parameters
252
        ----------
253
        arg_name: str
254
255
        arg_values: list of tuples
256
257
        Returns
258
        -------
259
        vals: list of tuples
260
261
        Raises
262
        ------
263
        ValueError: if `arg_values` is None and `arg_name` is not the
264
        first crumb argument in self._path
265
266
        IOError: if this crosses to any path that is non-existing.
267
        """
268
        if arg_values is None and not self._is_firstarg(arg_name):
269
            raise ValueError("Cannot get the list of values for {} if"
270
                             " the previous arguments are not filled"
271
                             " in `paths`.".format(arg_name))
272
273
        aidx = self._find_arg(arg_name)
274
275
        # check if the path is absolute, do it absolute
276
        apath = self._abspath()
277
        splt = apath.split(op.sep)
278
279
        if aidx == len(splt) - 1:  # this means we have to list files too
280
            just_dirs = False
281
        else:  # this means we have to list folders
282
            just_dirs = True
283
284
        vals = []
285
        if arg_values is None:
286
            base = op.sep.join(splt[:aidx])
287
            vals = [[(arg_name, val)] for val in list_children(base, just_dirs=just_dirs, ignore=self._ignore)]
288
        else:
289
            for aval in arg_values:
290
                #  create the part of the crumb path that is already specified
291
                path = self._split(self._replace(self._path, **dict(aval)))[0]
292
293
                #  list the children of `path`
294
                subpaths = list_children(path, just_dirs=just_dirs, ignore=self._ignore)
295
296
                #  extend `val` tuples with the new list of values for `aval`
297
                vals.extend([aval + [(arg_name, sp)] for sp in subpaths])
298
299
        return vals
300
301
    def _check_argidx(self, arg_names):
302
        """ Raise a KeyError if any of the arguments in arg_names is not a crumb
303
        argument name in self path.
304
        Parameters
305
        ----------
306
        arg_names: sequence of str
307
            Names of crumb arguments
308
309
        Raises
310
        ------
311
        KeyError
312
        """
313
        if not set(arg_names).issubset(set(self._argidx.keys())):
314
            raise KeyError("Expected `arg_names` to be a subset of ({}),"
315
                           " got {}.".format(list(self._argidx.keys()), arg_names))
316
317
    def setitems(self, **kwargs):
318
        """ Set the crumb arguments in path to the given values in kwargs and update
319
        self accordingly.
320
        Parameters
321
        ----------
322
        kwargs: strings
323
324
        Returns
325
        -------
326
        crumb: Crumb
327
        """
328
        self._check_argidx(kwargs.keys())
329
330
        self.path = self._replace(self._path, **kwargs)
331
        self._update()
332
        self._argval.update(**kwargs)
333
334
        return self
335
336
    def replace(self, **kwargs):
337
        """ Return a copy of self with the crumb arguments in
338
        `kwargs` replaced by its values.
339
        Parameters
340
        ----------
341
        kwargs: strings
342
343
        Returns
344
        -------
345
        crumb:
346
        """
347
        cr = self.copy(self)
348
        return cr.setitems(**kwargs)
349
350
    def _arg_deps(self, arg_name):
351
        """ Return a subdict of `self._argidx` with the
352
         values from the crumb arguments that come before
353
         `arg_name` in the crumb path.
354
        Parameters
355
        ----------
356
        arg_name: str
357
358
        Returns
359
        -------
360
        arg_deps: Mapping[str, int]
361
        """
362
        argidx = self._find_arg(arg_name)
363
        return OrderedDict([(arg, idx) for arg, idx in self._argidx.items() if idx <= argidx])
364
365
    def values_map(self, arg_name, check_exists=False):
366
        """ Return a list of tuples of crumb arguments with their values.
367
368
        Parameters
369
        ----------
370
        arg_name: str
371
372
        check_exists: bool
373
374
        Returns
375
        -------
376
        values_map: list of lists of 2-tuples
377
        """
378
        arg_deps = self._arg_deps(arg_name)
379
        values_map = None
380
        for arg in arg_deps:
381
            values_map = self._arg_values(arg, values_map)
382
383
        if check_exists:
384
            paths = [self.from_path(path) for path in self._build_paths(values_map)]
385
            values_map_checked = [args for args, path in zip(values_map, paths) if path.exists()]
386
        else:
387
            values_map_checked = values_map
388
389
        return values_map_checked
390
391
    def _build_paths(self, values_map, make_crumbs=False):
392
        """ Return a list of paths from each tuple of args from `values_map`
393
        Parameters
394
        ----------
395
        values_map: list of sequences of 2-tuple
396
397
        make_crumbs: bool
398
            If `make_crumbs` is True will create a Crumb for
399
            each element of the result.
400
401
        Returns
402
        -------
403
        paths: list of str or list of Crumb
404
        """
405
        if make_crumbs:
406
            return [self.replace(**dict(val)) for val in values_map]
407
        else:
408
            return [self._replace(self._path, **dict(val)) for val in values_map]
409
410
    def ls(self, arg_name, fullpath=True, make_crumbs=True, check_exists=False):
411
        """ Return the list of values for the argument crumb `arg_name`.
412
        This will also unfold any other argument crumb that appears before in the
413
        path.
414
        Parameters
415
        ----------
416
        arg_name: str
417
            Name of the argument crumb to be unfolded.
418
419
        fullpath: bool
420
            If True will build the full path of the crumb path, will also append
421
            the rest of crumbs not unfolded.
422
            If False will only return the values for the argument with name
423
            `arg_name`.
424
425
        make_crumbs: bool
426
            If `fullpath` and `make_crumbs` is True will create a Crumb for
427
            each element of the result.
428
429
        check_exists: bool
430
            If True will return only str, Crumb or Path if it exists
431
            in the file path, otherwise it may create file paths
432
            that don't have to exist.
433
434
        Returns
435
        -------
436
        values: list of str or Crumb
437
438
        Examples
439
        --------
440
        >>> cr = Crumb(op.join(op.expanduser('~'), '{user_folder}'))
441
        >>> user_folders = cr.ls('user_folder',fullpath=True,make_crumbs=True)
442
        """
443
        self._check_argidx([arg_name])
444
445
        start_sym, _ = self._start_end_syms
446
447
        # if the first chunk of the path is a parameter, I am not interested in this (for now)
448
        if self._path.startswith(start_sym):
449
            raise NotImplementedError("Can't list paths that start with an argument.")
450
451
        if make_crumbs and not fullpath:
452
            raise ValueError("`make_crumbs` can only work if `fullpath` is also True.")
453
454
        values_map = self.values_map(arg_name, check_exists=check_exists)
455
456
        if fullpath:
457
            paths = sorted(self._build_paths(values_map, make_crumbs=make_crumbs))
458
459
        else:
460
            paths = [dict(val)[arg_name] for val in values_map]
461
462
        return paths
463
464
    def _remaining_deps(self, arg_names):
465
        """ Return the name of the arguments that are dependencies of `arg_names`.
466
        Parameters
467
        ----------
468
        arg_names: Sequence[str]
469
470
        Returns
471
        -------
472
        rem_deps: Sequence[str]
473
        """
474
        started = False
475
        rem_deps = []
476
        for an in reversed(list(self._argidx.keys())):  # take into account that argidx is ordered
477
            if an in arg_names:
478
                started = True
479
            else:
480
                if started:
481
                    rem_deps.append(an)
482
483
        return rem_deps
484
485
    def touch(self):
486
        """ Create a leaf directory and all intermediate ones using the non
487
        crumbed part of `crumb_path`.
488
        If the target directory already exists, raise an IOError if exist_ok
489
        is False. Otherwise no exception is raised.
490
        Parameters
491
        ----------
492
        crumb_path: str
493
494
        exist_ok: bool
495
            Default = True
496
497
        Returns
498
        -------
499
        nupath: str
500
            The new path created.
501
        """
502
        return self._touch(self._path)
503
504
    def joinpath(self, suffix):
505
        """ Return a copy of the current crumb with the `suffix` path appended.
506
        If suffix has crumb arguments, the whole crumb will be updated.
507
        Parameters
508
        ----------
509
        suffix: str
510
511
        Returns
512
        -------
513
        cr: Crumb
514
        """
515
        return Crumb(op.join(self._path, suffix))
516
517
    def exists(self):
518
        """ Return True if the current crumb path is a possibly existing path,
519
        False otherwise.
520
        Returns
521
        -------
522
        exists: bool
523
        """
524
        if not self.has_crumbs(self._path):
525
            return op.exists(str(self)) or op.islink(str(self))
526
527
        if not op.exists(self.split()[0]):
528
            return False
529
530
        last, _ = self._lastarg()
531
        paths = self.ls(last, fullpath=True, make_crumbs=False, check_exists=False)
532
533
        return all([self._split_exists(lp) for lp in paths])
534
535
    def has_files(self):
536
        """ Return True if the current crumb path has any file in its
537
        possible paths.
538
        Returns
539
        -------
540
        has_files: bool
541
        """
542
        if not op.exists(self.split()[0]):
543
            return False
544
545
        last, _ = self._lastarg()
546
        paths = self.ls(last, fullpath=True, make_crumbs=True, check_exists=True)
547
548
        return any([op.isfile(str(lp)) for lp in paths])
549
550
    def unfold(self):
551
        """ Return a list of all the existing paths until the last crumb argument.
552
        Returns
553
        -------
554
        paths: list of pathlib.Path
555
        """
556
        return self.ls(self._lastarg()[0], fullpath=True, make_crumbs=True, check_exists=True)
557
558
    def __getitem__(self, arg_name):
559
        """ Return the existing values of the crumb argument `arg_name`
560
        without removing duplicates.
561
        Parameters
562
        ----------
563
        arg_name: str
564
565
        Returns
566
        -------
567
        values: list of str
568
        """
569
        if arg_name in self._argval:
570
            return self._argval[arg_name]
571
        else:
572
            return self.ls(arg_name, fullpath=False, make_crumbs=False, check_exists=True)
573
574
    def __setitem__(self, key, value):
575
        if key not in self._argidx:
576
            raise KeyError("Expected `arg_name` to be one of ({}),"
577
                           " got {}.".format(list(self._argidx), key))
578
        _ = self.setitems(**{key: value})
579
580
    def __ge__(self, other):
581
        return self._path >= str(other)
582
583
    def __le__(self, other):
584
        return self._path <= str(other)
585
586
    def __gt__(self, other):
587
        return self._path > str(other)
588
589
    def __lt__(self, other):
590
        return self._path < str(other)
591
592
    def __hash__(self):
593
        return self._path.__hash__()
594
595
    def __contains__(self, item):
596
        return item in self._argidx
597
598
    def __repr__(self):
599
        return '{}("{}")'.format(__class__.__name__, self._path)
600
601
    def __str__(self):
602
        return str(self._path)
603
604
    def __eq__(self, other):
605
        """ Return True if `self` and `other` are equal, False otherwise.
606
        Parameters
607
        ----------
608
        other: Crumb
609
610
        Returns
611
        -------
612
        is_equal: bool
613
        """
614
        if self._path != other._path:
615
            return False
616
617
        if self._argidx != other._argidx:
618
            return False
619
620
        if self._argval != other._argval:
621
            return False
622
623
        if self._ignore != other._ignore:
624
            return False
625
626
        return True
627
628