Completed
Push — master ( d09250...b67a9c )
by Alexandre M.
9s
created

Crumb.ls()   F

Complexity

Conditions 12

Size

Total Lines 77

Duplication

Lines 0
Ratio 0 %

Importance

Changes 9
Bugs 3 Features 0
Metric Value
cc 12
c 9
b 3
f 0
dl 0
loc 77
rs 2.1279

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like Crumb.ls() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
3
# vi: set ft=python sts=4 ts=4 sw=4 et:
4
"""
5
Crumb class: the smart path model class.
6
"""
7
import os
8
import pathlib
9
import re
10
from collections import OrderedDict
11
from copy import deepcopy
12
from typing import List, Dict, Iterator, Tuple
13
14
from hansel._utils import (
15
    _first_txt,
16
    _build_path,
17
    _arg_names,
18
    _find_arg_depth,
19
    _check,
20
    _depth_names,
21
    _depth_names_regexes,
22
    _has_arg,
23
    _is_crumb_arg,
24
    _split_exists,
25
    _split,
26
    _touch,
27
    has_crumbs,
28
    is_valid,
29
)
30
from hansel.utils import (
31
    list_subpaths,
32
    fnmatch_filter,
33
    regex_match_filter,
34
    CrumbArgsSequence, CrumbArgsSequences)
35
36
37
class Crumb(object):
38
    """ The crumb path model class.
39
    Parameters
40
    ----------
41
    crumb_path: str
42
        A file or folder path with crumb arguments. See Examples.
43
44
    ignore_list: sequence of str
45
        A list of `fnmatch` patterns of filenames to be ignored.
46
47
    regex: str
48
        Choices: 'fnmatch', 're' or 're.ignorecase'
49
        If 'fnmatch' will use fnmatch regular expressions to
50
        match any expression you may have in a crumb argument.
51
        If 're' will use re.match.
52
        If 're.ignorecase' will use re.match and pass re.IGNORE_CASE to re.compile.
53
54
    Examples
55
    --------
56
    >>> crumb = Crumb("{base_dir}/raw/{subject_id}/{session_id}/{modality}/{image}")
57
    >>> cr = Crumb(os.path.join(os.path.expanduser('~'), '{user_folder}'))
58
    """
59
60
    def __init__(self, crumb_path: str, ignore_list: List[str] = None, regex: str = 'fnmatch'):
61
        self._path = _check(crumb_path)
62
        self._argval = {}  # what is the value of the argument in the current path, if any has been set.
63
        self._re_method = regex
64
        self._re_args = None
65
66
        if ignore_list is None:
67
            ignore_list = []
68
69
        self._ignore = ignore_list
70
        self._update()
71
72
    def _update(self):
73
        """ Clean up, parse the current crumb path and fill the internal
74
        members for functioning."""
75
        self._set_match_function()
76
77
    def _set_match_function(self):
78
        """ Update self._match_filter with a regular expression
79
        matching function depending on the value of self._re_method."""
80
        if self._re_method == 'fnmatch':
81
            self._match_filter = fnmatch_filter
82
        elif self._re_method == 're':
83
            self._match_filter = regex_match_filter
84
        elif self._re_method == 're.ignorecase':
85
            self._match_filter = regex_match_filter
86
            self._re_args = (re.IGNORECASE,)
87
        else:
88
            raise ValueError('Expected regex method value to be "fnmatch", "re" or "re.ignorecase"'
89
                             ', got {}.'.format(self._re_method))
90
91
    def is_valid(self, crumb_path: str = None) -> bool:
92
        """ Return True if the `crumb_path` is a valid crumb path, False otherwise.
93
        If `crumb_path` is None, will use `self.path` instead.
94
        """
95
        if crumb_path is None:
96
            crumb_path = self.path
97
98
        return is_valid(crumb_path)
99
100
    @property
101
    def patterns(self):
102
        """ Returns a dict with the arg_names as keys and regular expressions as values."""
103
        return {arg: rgx for _, (arg, rgx) in _depth_names_regexes(self._path) if rgx}
104
105
    def set_pattern(self, arg_name: str, arg_regex: str):
106
        """ Set the pattern `arg_regex` to the given argument `arg_name`."""
107
        if not _has_arg(self.path, arg_name):
108
            raise KeyError('Crumb argument {} is not present in {}.'.format(arg_name, self))
109
110
        self._path = _build_path(
111
            self._path,
112
            arg_values={},
113
            with_regex=True,
114
            regexes={arg_name: arg_regex}
115
        )
116
117
    def set_patterns(self, **kwargs):
118
        """ Set the pattern to the given arguments as keywords. """
119
        for arg, pat in kwargs.items():
120
            self.set_pattern(arg, pat)
121
122
    def clear_pattern(self, arg_name: str):
123
        """ Clear the pattern of the given argument `arg_name`."""
124
        self.set_pattern(arg_name, '')
125
126
    def clear(self, arg_name: str):
127
        """ Clear the value of the given argument `arg_name`."""
128
        del self._argval[arg_name]
129
130
    @property
131
    def arg_values(self) -> Dict[str, str]:
132
        """ Return a dict with the arg_names and values of the already replaced crumb arguments."""
133
        return self._argval
134
135
    @property
136
    def path(self) -> str:
137
        """Return the current crumb path string."""
138
        return _build_path(self._path, arg_values=self.arg_values, with_regex=True)
139
140
    @path.setter
141
    def path(self, value: str):
142
        """ Set the current crumb path string and updates the internal members.
143
        Parameters
144
        ----------
145
        value: str
146
            A file or folder path with crumb arguments. See Examples in class docstring.
147
        """
148
        self._path = value
149
        self._update()
150
151
    def has_crumbs(self, crumb_path: str = None) -> bool:
152
        """ Return True if the current path has open crumb arguments, False otherwise.
153
        If `crumb_path` is None will test on `self.path` instead.
154
        """
155
        if crumb_path is None:
156
            crumb_path = self.path
157
        return has_crumbs(crumb_path)
158
159
    def _open_arg_items(self):
160
        """ Return an iterator to the crumb _argidx items in `self` that have
161
        not been replaced yet. In the same order as they appear in the crumb path.
162
163
        Returns
164
        -------
165
        depth_args: generator of 2-tuple of int and str
166
            For each item will return the depth index of the undefined crumb
167
            argument and its name.
168
169
        Note
170
        ----
171
        I know that there is shorter/faster ways to program this but I wanted to maintain the
172
        order of the arguments in argidx in the result of this function.
173
        """
174
        for depth, arg_name in _depth_names(self.path):
175
            yield depth, arg_name
176
177
    def _last_open_arg(self):
178
        """ Return the idx and name of the last (right-most) open argument."""
179
        open_args = list(self._open_arg_items())
180
        if not open_args:
181
            return None, None
182
183
        for dpth, arg in reversed(open_args):
184
            return dpth, arg
185
186
    def _first_open_arg(self):
187
        """ Return the idx and name of the first (left-most) open argument."""
188
        for dpth, arg in self._open_arg_items():
189
            return dpth, arg
190
191
    def _is_first_open_arg(self, arg_name: str) -> bool:
192
        """ Return True if `arg_name` is the first open argument."""
193
        # Take into account that self._argidx is OrderedDict
194
        return arg_name == self._first_open_arg()[1]
195
196
    def has_set(self, arg_name: str) -> bool:
197
        """ Return True if the argument `arg_name` has been set to a
198
        specific value, False if it is still a crumb argument."""
199
        return arg_name not in set(self.open_args())
200
201
    def open_args(self) -> Iterator[str]:
202
        """ Return an iterator to the crumb argument names in `self`
203
        that have not been replaced yet.
204
        In the same order as they appear in the crumb path."""
205
        for _, arg_name in self._open_arg_items():
206
            yield arg_name
207
208
    def all_args(self) -> Iterator[str]:
209
        """ Return an iterator to all the crumb argument names in `self`,
210
        first the open ones and then the replaced ones.
211
212
        Returns
213
        -------
214
        crumb_args: set of str
215
        """
216
        yield from _arg_names(self._path)
217
218
    def copy(self, crumb: 'Crumb' = None) -> 'Crumb':
219
        """ Return a deep copy of the given `crumb`.
220
        If `crumb` is None will return a copy of self.
221
222
        Parameters
223
        ----------
224
        crumb: str or Crumb
225
226
        Returns
227
        -------
228
        copy: Crumb
229
        """
230
        if crumb is None:
231
            crumb = self
232
233
        if isinstance(crumb, Crumb):
234
            nucr = Crumb(
235
                crumb._path,
236
                ignore_list=crumb._ignore,
237
                regex=crumb._re_method
238
            )
239
            nucr._argval = deepcopy(crumb._argval)
240
            return nucr
241
242
        if isinstance(crumb, str):
243
            return Crumb.from_path(crumb)
244
245
        raise TypeError("Expected a Crumb or a str to copy, "
246
                        "got {}.".format(type(crumb)))
247
248
    def isabs(self) -> bool:
249
        """ Return True if the current crumb path has an absolute path,
250
        False otherwise.
251
        This means that its path is valid and starts with a `os.path.sep` character
252
        or hard disk letter.
253
        """
254
        subp = _first_txt(self.path)
255
        return os.path.isabs(subp)
256
257
    def abspath(self, first_is_basedir: bool = False) -> 'Crumb':
258
        """ Return a copy of `self` with an absolute crumb path.
259
        Add as prefix the absolute path to the current directory if
260
        the current crumb is not absolute.
261
        Parameters
262
        ----------
263
        first_is_basedir: bool
264
            If True and the current crumb path starts with a crumb argument and first_is_basedir,
265
            the first argument will be replaced by the absolute path to the current dir,
266
            otherwise the absolute path to the current dir will be added as a prefix.
267
268
        Returns
269
        -------
270
        abs_crumb: Crumb
271
        """
272
        nucr = self.copy()
273
274
        if not nucr.isabs():
275
            nucr._path = self._abspath(first_is_basedir=first_is_basedir)
276
277
        return nucr
278
279
    def _abspath(self, first_is_basedir: bool = False) -> str:
280
        """ Return the absolute path of the current crumb path.
281
        Parameters
282
        ----------
283
        first_is_basedir: bool
284
            If True and the current crumb path starts with a crumb argument and first_is_basedir,
285
            the first argument will be replaced by the absolute path to the current dir,
286
            otherwise the absolute path to the current dir will be added as a prefix.
287
288
        Returns
289
        -------
290
        abspath: str
291
        """
292
        if os.path.isabs(self._path):
293
            return self._path
294
295
        splits = self._path.split(os.path.sep)
296
        basedir = [os.path.abspath(os.path.curdir)]
297
298
        if _is_crumb_arg(splits[0]):
299
            if first_is_basedir:
300
                splits.pop(0)
301
302
        basedir.extend(splits)
303
        return os.path.sep.join(basedir)
304
305
    def split(self) -> Tuple[str, str]:
306
        """ Split `crumb_path` in two parts, the first is the base folder without
307
            any crumb argument and the second is the rest of `crumb_path` beginning
308
            with the first crumb argument.
309
            If `crumb_path` starts with an argument, will return ('', crumb_path).
310
        """
311
        return _split(self.path)
312
313
    @classmethod
314
    def from_path(cls, crumb_path: [str, 'Crumb', pathlib.Path]) -> 'Crumb':
315
        """ Create an instance of Crumb out of `crumb_path`.
316
        Parameters
317
        ----------
318
        val: str or Crumb or pathlib.Path
319
320
        Returns
321
        -------
322
        path: Crumb
323
        """
324
        if isinstance(crumb_path, Crumb):
325
            return crumb_path.copy()
326
        elif isinstance(crumb_path, pathlib.Path):
327
            return cls(str(crumb_path))
328
        elif isinstance(crumb_path, str):
329
            return cls(crumb_path)
330
        else:
331
            raise TypeError("Expected a `val` to be a `str`, got {}.".format(type(crumb_path)))
332
333
    def _arg_values(self, arg_name: str, arg_values: CrumbArgsSequence = None) -> CrumbArgsSequences:
334
        """ Return the existing values in the file system for the crumb argument
335
        with name `arg_name`.
336
        The `arg_values` must be a sequence with the tuples with valid values of the dependent
337
        (previous in the path) crumb arguments.
338
        The format of `arg_values` work in such a way that `self._path.format(dict(arg_values[0]))`
339
        would give me a valid path or crumb.
340
        Parameters
341
        ----------
342
        arg_name: str
343
344
        arg_values: list of tuples
345
346
        Returns
347
        -------
348
        vals: list of tuples
349
350
        Raises
351
        ------
352
        ValueError: if `arg_values` is None and `arg_name` is not the
353
        first crumb argument in self._path
354
355
        AttributeError: if the path is not absolute
356
357
        IOError: if this crosses to any path that is non-existing.
358
        """
359
        # if arg_name is not None and arg_values is None:
360
        #     if arg_name in self.arg_values:
361
        #         return [[(arg_name, self.arg_values[arg_name])]]
362
363
        if arg_values is None and not self._is_first_open_arg(arg_name):
364
            raise ValueError("Cannot get the list of values for {} if"
365
                             " the previous arguments are not filled"
366
                             " in `paths`.".format(arg_name))
367
368
        path = self.path
369
        dpth, arg_name, arg_regex = _find_arg_depth(path, arg_name)
370
        splt = path.split(os.path.sep)
371
372
        if dpth == len(splt) - 1:  # this means we have to list files too
373
            just_dirs = False
374
        else:  # this means we have to list folders
375
            just_dirs = True
376
377
        if arg_values is None:
378
            vals = self._arg_values_from_base(
379
                basedir=os.path.sep.join(splt[:dpth]),
380
                arg_name=arg_name,
381
                arg_regex=arg_regex,
382
                just_dirs=just_dirs
383
            )
384
        else:
385
            vals = self._extend_arg_values(
386
                arg_values=arg_values,
387
                arg_name=arg_name,
388
                arg_regex=arg_regex,
389
                just_dirs=just_dirs
390
            )
391
392
        return vals
393
394
    def _extend_arg_values(
395
        self,
396
        arg_values: CrumbArgsSequence,
397
        arg_name: str,
398
        arg_regex: str,
399
        just_dirs: bool
400
    ) -> CrumbArgsSequences:
401
        """ Return an extended copy of `arg_values` with valid values for `arg_name`."""
402
        path = self.path
403
        vals = []
404
        for aval in arg_values:
405
            #  create the part of the crumb path that is already specified
406
            nupath = _split(_build_path(path, arg_values=dict(aval)))[0]
407
408
            # THIS HAPPENS, LEAVE IT. TODO: make a test for this line
409
            if not os.path.exists(nupath):
410
                continue
411
412
            paths = list_subpaths(
413
                nupath,
414
                just_dirs=just_dirs,
415
                ignore=self._ignore,
416
                pattern=arg_regex,
417
                filter_func=self._match_filter
418
            )
419
420
            #  extend `val` tuples with the new list of values for `aval`
421
            vals.extend([aval + [(arg_name, sp)] for sp in paths])
422
423
        return vals
424
425
    def _arg_values_from_base(self, basedir: str, arg_name: str, arg_regex: str, just_dirs: bool) -> CrumbArgsSequences:
426
        """ Return a map of arg values for `arg_name` from the `basedir`."""
427
        vals = list_subpaths(basedir,
428
                             just_dirs=just_dirs,
429
                             ignore=self._ignore,
430
                             pattern=arg_regex,
431
                             filter_func=self._match_filter,
432
                             filter_args=self._re_args)
433
434
        return [[(arg_name, val)] for val in vals]
435
436
    def _check_args(self, arg_names: Iterator[str], self_args: Iterator[str]):
437
        """ Raise a ValueError if `self_args` is empty.
438
            Raise a KeyError if `arg_names` is not a subset of `self_args`.
439
        """
440
        anames = set(arg_names)
441
        aself = set(self_args)
442
        if not anames and not aself:
443
            return
444
445
        if not aself or aself is None:
446
            raise AttributeError('This Crumb has no remaining arguments: {}.'.format(self.path))
447
448
        if not anames.issubset(aself):
449
            raise KeyError("Expected `arg_names` to be a subset of ({}),"
450
                           " got {}.".format(list(aself), anames))
451
452
    def _check_open_args(self, arg_names: Iterator[str]):
453
        """ Raise a KeyError if any of the arguments in `arg_names` is not a crumb
454
        argument name in `self.path`.
455
        Parameters
456
        ----------
457
        arg_names: sequence of str
458
            Names of crumb arguments
459
460
        Raises
461
        ------
462
        KeyError
463
        """
464
        self._check_args(arg_names, self_args=self.open_args())
465
466
    def update(self, **kwargs) -> 'Crumb':
467
        """ Set the crumb arguments in path to the given values in kwargs and update
468
        self accordingly.
469
        Parameters
470
        ----------
471
        kwargs: strings
472
473
        Returns
474
        -------
475
        crumb: Crumb
476
        """
477
        self._check_args(list(kwargs.keys()), self_args=self.all_args())
478
479
        for k, v in kwargs.items():
480
            if not isinstance(v, str):
481
                raise ValueError("Expected a string for the value of argument {}, "
482
                                 "got {}.".format(k, v))
483
484
        path = _build_path(self.path, arg_values=kwargs, with_regex=True)
485
        _check(path)
486
487
        self._argval.update(**kwargs)
488
        return self
489
490
    def replace(self, **kwargs) -> 'Crumb':
491
        """ Return a copy of self with the crumb arguments in
492
        `kwargs` replaced by its values.
493
        As an analogy to the `str.format` function this function could be called `format`.
494
        Parameters
495
        ----------
496
        kwargs: strings
497
498
        Returns
499
        -------
500
        crumb:
501
        """
502
        cr = self.copy(self)
503
        return cr.update(**kwargs)
504
505
    def _arg_parents(self, arg_name: str) -> Dict[str, int]:
506
        """ Return a subdict with the open arguments name and index in `self._argidx`
507
        that come before `arg_name` in the crumb path. Include `arg_name` himself.
508
        Parameters
509
        ----------
510
        arg_name:
511
512
        Returns
513
        -------
514
        arg_deps:
515
        """
516
        if arg_name not in self.arg_values:
517
            path = self.path
518
        else:
519
            path = self._path
520
521
        dpth, _, _ = _find_arg_depth(path, arg_name)
522
        return OrderedDict([(arg, idx) for idx, arg in self._open_arg_items() if idx <= dpth])
523
524
    def _args_open_parents(self, arg_names: Iterator[str]) -> Iterator[str]:
525
        """ Return the name of the arguments that are dependencies of `arg_names`.
526
        Parameters
527
        ----------
528
        arg_names:
529
530
        Returns
531
        -------
532
        rem_deps:
533
        """
534
        started = False
535
        arg_dads = []
536
        for an in reversed(list(self.open_args())):  # take into account that argidx is ordered
537
            if an in arg_names:
538
                started = True
539
            else:
540
                if started:
541
                    arg_dads.append(an)
542
543
        return list(reversed(arg_dads))
544
545
    def values_map(self, arg_name: str = '', check_exists: bool = False) -> CrumbArgsSequences:
546
        """ Return a list of tuples of crumb arguments with their values from the
547
        first argument until `arg_name`.
548
        Parameters
549
        ----------
550
        arg_name: str
551
            If empty will pick the arg_name of the last open argument of the Crumb.
552
553
        check_exists: bool
554
555
        Returns
556
        -------
557
        values_map: list of lists of 2-tuples
558
            I call values_map what is called `record` in pandas.
559
            It is a list of lists of 2-tuples, where each 2-tuple
560
            has the shape (arg_name, arg_value).
561
        """
562
        if not arg_name:
563
            _, arg_name = self._last_open_arg()
564
565
        if arg_name is None:
566
            return [list(self.arg_values.items())]
567
568
        arg_deps = self._arg_parents(arg_name)
569
570
        values_map = None
571
        if arg_deps:
572
            for arg in arg_deps:
573
                values_map = self._arg_values(arg, values_map)
574
        elif arg_name in self.arg_values:
575
            values_map = [[(arg_name, self.arg_values[arg_name])]]
576
        else:  # this probably will never be reached.
577
            raise ValueError('Could not build a map of values with '
578
                             'argument {}.'.format(arg_name))
579
580
        return sorted(self._build_and_check(values_map) if check_exists else values_map)
581
582
    def _build_and_check(self, values_map: CrumbArgsSequences) -> CrumbArgsSequences:
583
        """ Return a values_map of arg_values that lead to existing crumb paths."""
584
        paths = list(self.build_paths(values_map, make_crumbs=True))
585
        yield from (args for args, path in zip(values_map, paths) if path.exists())
586
587
    def build_paths(
588
        self,
589
        values_map: CrumbArgsSequences,
590
        make_crumbs: bool = True
591
    ) -> [Iterator[str], Iterator['Crumb']]:
592
        """ Return a list of paths from each tuple of args from `values_map`
593
        Parameters
594
        ----------
595
        values_map: list of sequences of 2-tuple
596
            Example: [[('subject_id', 'haensel'), ('candy', 'lollipos.path.png')],
597
                      [('subject_id', 'gretel'),  ('candy', 'jujube.png')],
598
                     ]
599
600
        make_crumbs: bool
601
            If `make_crumbs` is True will create a Crumb for
602
            each element of the result.
603
            Default: True.
604
605
        Returns
606
        -------
607
        paths: list of str or list of Crumb
608
        """
609
        if make_crumbs:
610
            yield from (self.replace(**dict(val)) for val in values_map)
611
        else:
612
            yield from (_build_path(self.path, arg_values=dict(val)) for val in values_map)
613
614
    def ls(
615
        self,
616
        arg_name: str = '',
617
        fullpath: bool = True,
618
        make_crumbs: bool = True,
619
        check_exists: bool = True
620
    ) -> [Iterator[str], Iterator['Crumb']]:
621
        """ Return the list of values for the argument crumb `arg_name`.
622
        This will also unfold any other argument crumb that appears before in the
623
        path.
624
        Parameters
625
        ----------
626
        arg_name: str
627
            Name of the argument crumb to be unfolded.
628
            If empty will pick the arg_name of the last open argument of the Crumb.
629
            `arg_name` can also contain file patterns in the same syntax as
630
            the `regex` argument type used in the `__init__` of the object.
631
632
        fullpath: bool
633
            If True will build the full path of the crumb path, will also append
634
            the rest of crumbs not unfolded.
635
            If False will only return the values for the argument with name
636
            `arg_name`.
637
638
        make_crumbs: bool
639
            If `fullpath` and `make_crumbs` is True will create a Crumb for
640
            each element of the result.
641
642
        check_exists: bool
643
            If True will return only str, Crumb or Path if it exists
644
            in the file path, otherwise it may create file paths
645
            that don't have to exist.
646
647
        Returns
648
        -------
649
        values
650
651
        Examples
652
        --------
653
        >>> cr = Crumb(os.path.join(os.path.expanduser('~'), '{user_folder}'))
654
        >>> user_folders = cr.ls('user_folder',fullpath=True,make_crumbs=True)
655
        """
656
        if not arg_name and not fullpath:
657
            raise ValueError('Expecting an `arg_name` if `fullpath` is False.')
658
659
        if not arg_name:
660
            _, arg_name = self._last_open_arg()
661
662
        if arg_name is None:
663
            arg_name = ''
664
665
        # check if there is any regex in the arg_name, if True, set the pattern
666
        # later check if the arg_name is correct
667
        arg_regex = False
668
        if arg_name:
669
            _, (arg_name, arg_regex) = tuple(_depth_names_regexes('{' + arg_name + '}'))[0]
670
            if arg_regex:
671
                old_regex = self.patterns.get(arg_name, None)
672
                self.set_pattern(arg_name=arg_name, arg_regex=arg_regex)
673
674
            self._check_args([arg_name], self.all_args())
675
676
        # build the paths or value maps
677
        self._check_ls_params(make_crumbs, fullpath)
678
679
        # make_crumbs only makes sense if fullpath is True
680
        if not fullpath:
681
            make_crumbs = fullpath
682
683
        # create the grid of values for the arguments
684
        values_map = self.values_map(arg_name, check_exists=check_exists)
685
        if fullpath:
686
            paths = self.build_paths(values_map, make_crumbs=make_crumbs)
687
        else:
688
            paths = (dict(val)[arg_name] for val in values_map)
689
690
        # clear and set the old the pattern if it was set for this query
691
        if arg_regex:
692
            self.clear_pattern(arg_name=arg_name)
693
            if old_regex is not None:
694
                self.set_pattern(arg_name=arg_name, arg_regex=old_regex)
695
696
        return sorted(paths)
697
698
    def _check_ls_params(self, make_crumbs: bool, fullpath: bool):
699
        """ Raise errors if the arguments are not good for ls function."""
700
        # if the first chunk of the path is a parameter, I am not interested in this (for now)
701
        # check if the path is absolute, if not raise an NotImplementedError
702
        if not self.isabs() and self.path.startswith('{'):
703
            raise NotImplementedError("Cannot list paths that start with an argument. "
704
                                      "If this is a relative path, use the `abspath()` "
705
                                      "member function.")
706
707
    def touch(self, exist_ok: bool = True) -> str:
708
        """ Create a leaf directory and all intermediate ones using the non
709
        crumbed part of `crumb_path`.
710
        If the target directory already exists, raise an IOError if exist_ok
711
        is False. Otherwise no exception is raised.
712
        Parameters
713
        ----------
714
        crumb_path: str
715
716
        exist_ok: bool
717
            Default = True
718
719
        Returns
720
        -------
721
        nupath: str
722
            The new path created.
723
        """
724
        return _touch(self.path, exist_ok=exist_ok)
725
726
    def joinpath(self, suffix: str) -> 'Crumb':
727
        """ Return a copy of the current crumb with the `suffix` path appended.
728
        If suffix has crumb arguments, the whole crumb will be updated.
729
        Parameters
730
        ----------
731
        suffix: str
732
733
        Returns
734
        -------
735
        cr: Crumb
736
        """
737
        return Crumb(os.path.join(self.path, suffix))
738
739
    def exists(self) -> bool:
740
        """ Return True if the current crumb path is a possibly existing path,
741
        False otherwise.
742
        Returns
743
        -------
744
        exists: bool
745
        """
746
        if not has_crumbs(self.path):
747
            return os.path.exists(str(self)) or os.path.islink(str(self))
748
749
        if not os.path.exists(self.split()[0]):
750
            return False
751
752
        _, last = self._last_open_arg()
753
        paths = self.ls(last,
754
                        fullpath=True,
755
                        make_crumbs=False,
756
                        check_exists=False)
757
758
        return any((_split_exists(lp) for lp in paths))
759
760
    def has_files(self) -> bool:
761
        """ Return True if the current crumb path has any file in its
762
        possible paths.
763
        Returns
764
        -------
765
        has_files: bool
766
        """
767
        if not os.path.exists(self.split()[0]):
768
            return False
769
770
        _, last = self._last_open_arg()
771
        paths = self.ls(
772
            last,
773
            fullpath=True,
774
            make_crumbs=True,
775
            check_exists=True
776
        )
777
778
        return any((os.path.isfile(str(lp)) for lp in paths))
779
780
    def unfold(self) -> [List['Crumb'], Iterator[pathlib.Path]]:
781
        """ Return a list of all the existing paths until the last crumb argument.
782
        If there are no remaining open arguments,
783
        Returns
784
        -------
785
        paths: list of pathlib.Path
786
        """
787
        if list(self.open_args()):
788
            return self.ls(
789
                self._last_open_arg()[1],
790
                fullpath=True,
791
                make_crumbs=True,
792
                check_exists=True
793
            )
794
795
        return [self]
796
797
    def get_first(self, arg_name: str) -> str:
798
        """ Return the first existing value of the crumb argument `arg_name`.
799
        Parameters
800
        ----------
801
        arg_name: str
802
803
        Returns
804
        -------
805
        values: str
806
        """
807
        return self[arg_name][0]
808
809
    def __getitem__(self, arg_name):
810
        """ Return the existing values of the crumb argument `arg_name`
811
        without removing duplicates.
812
        Parameters
813
        ----------
814
        arg_name: str
815
816
        Returns
817
        -------
818
        values: list of str
819
        """
820
        if arg_name in self._argval:
821
            return [self._argval[arg_name]]
822
        else:
823
            return self.ls(arg_name,
824
                           fullpath=False,
825
                           make_crumbs=False,
826
                           check_exists=True)
827
828
    def __setitem__(self, key: str, value: str):
829
        self.update(**{key: value})
830
831
    def __ge__(self, other: 'Crumb') -> bool:
832
        return self._path >= str(other)
833
834
    def __le__(self, other: 'Crumb') -> bool:
835
        return self._path <= str(other)
836
837
    def __gt__(self, other: 'Crumb') -> bool:
838
        return self._path > str(other)
839
840
    def __lt__(self, other: 'Crumb') -> bool:
841
        return self._path < str(other)
842
843
    def __hash__(self) -> int:
844
        return self._path.__hash__()
845
846
    def __contains__(self, arg_name) -> bool:
847
        return arg_name in self.all_args()
848
849
    def __repr__(self) -> str:
850
        return '{}("{}")'.format(type(self).__name__, self.path)
851
852
    def __str__(self) -> str:
853
        return self.path
854
855
    def __eq__(self, other: 'Crumb') -> bool:
856
        """ Return True if `self` and `other` are equal, False otherwise.
857
        Parameters
858
        ----------
859
        other: Crumb
860
861
        Returns
862
        -------
863
        is_equal: bool
864
        """
865
        if self._path != other._path:
866
            return False
867
868
        if self._argval != other._argval:
869
            return False
870
871
        if self._ignore != other._ignore:
872
            return False
873
874
        return True
875