Completed
Push — master ( 21dee9...eddb34 )
by Alexandre M.
01:06
created

hansel.Crumb.clear_pattern()   A

Complexity

Conditions 1

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 1
dl 0
loc 3
rs 10
1
# -*- coding: utf-8 -*-
2
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
3
# vi: set ft=python sts=4 ts=4 sw=4 et:
4
"""
5
Crumb class: the smart path model class.
6
"""
7
import re
8
import os.path     as op
9
from   copy        import deepcopy
10
from   collections import OrderedDict, Mapping, Sequence
11
from   functools   import partial
12
from   six         import string_types
13
try:
14
    from pathlib2 import Path
15
except:
16
    from pathlib  import Path
17
18
19
from   .utils  import (
20
                       list_subpaths,
21
                       fnmatch_filter,
22
                       regex_match_filter,
23
                       )
24
25
#from hansel._utils import deprecated
26
from   ._utils import (
27
                       _first_txt,
28
                       _build_path,
29
                       _arg_names,
30
                       _find_arg_depth,
31
                       _check,
32
                       _has_arg,
33
                       _depth_names,
34
                       _depth_names_regexes,
35
                       _is_crumb_arg,
36
                       _split_exists,
37
                       _split,
38
                       _touch,
39
                       has_crumbs,
40
                       is_valid,
41
                       )
42
43
44
class Crumb(object):
45
    """ The crumb path model class.
46
    Parameters
47
    ----------
48
    crumb_path: str
49
        A file or folder path with crumb arguments. See Examples.
50
51
    ignore_list: sequence of str
52
        A list of `fnmatch` patterns of filenames to be ignored.
53
54
    regex: str
55
        Choices: 'fnmatch', 're' or 're.ignorecase'
56
        If 'fnmatch' will use fnmatch regular expressions to
57
        match any expression you may have in a crumb argument.
58
        If 're' will use re.match.
59
        If 're.ignorecase' will use re.match and pass re.IGNORE_CASE to re.compile.
60
61
    Examples
62
    --------
63
    >>> crumb = Crumb("{base_dir}/raw/{subject_id}/{session_id}/{modality}/{image}")
64
    >>> cr = Crumb(op.join(op.expanduser('~'), '{user_folder}'))
65
    """
66
    def __init__(self, crumb_path, ignore_list=None, regex='fnmatch'):
67
        self._path      = _check(crumb_path)
68
        self._argval    = {}  # what is the value of the argument in the current path, if any has been set.
69
        self._re_method = regex
70
        self._re_args   = None
71
72
        if ignore_list is None:
73
            ignore_list = []
74
75
        self._ignore = ignore_list
76
        self._update()
77
78
    def _update(self):
79
        """ Clean up, parse the current crumb path and fill the internal
80
        members for functioning."""
81
        self._set_match_function()
82
83
    def _set_match_function(self):
84
        """ Update self._match_filter with a regular expression
85
        matching function depending on the value of self._re_method."""
86
        if self._re_method == 'fnmatch':
87
            self._match_filter = fnmatch_filter
88
        elif self._re_method == 're':
89
            self._match_filter = regex_match_filter
90
        elif self._re_method == 're.ignorecase':
91
            self._match_filter = regex_match_filter
92
            self._re_args      = (re.IGNORECASE, )
93
        else:
94
            raise ValueError('Expected regex method value to be "fnmatch", "re" or "re.ignorecase"'
95
                             ', got {}.'.format(self._re_method))
96
97
    def is_valid(self, crumb_path=None):
98
        """ Return True if the `crumb_path` is a valid crumb path, False otherwise.
99
        If `crumb_path` is None, will use `self.path` instead.
100
        """
101
        if crumb_path is None:
102
            crumb_path = self.path
103
104
        return is_valid(crumb_path)
105
106
    @property
107
    def patterns(self):
108
        """ Returns a dict with the arg_names as keys and regular expressions as values."""
109
        return {arg: rgx for _, (arg, rgx) in _depth_names_regexes(self.path)}
110
111
    def set_pattern(self, arg_name, arg_regex):
112
        """ Set the pattern `arg_regex` to the given argument `arg_name`."""
113
        self._path = _build_path(self._path, arg_values=self.arg_values, with_regex=True, regexes={arg_name: arg_regex})
114
115
    def clear_pattern(self, arg_name):
116
        """ Clear the pattern of the given argument `arg_name`."""
117
        self.set_pattern(arg_name, '')
118
119
    @property
120
    def arg_values(self):
121
        """ Return a dict with the arg_names and values of the already replaced crumb arguments."""
122
        return self._argval
123
124
    @property
125
    def path(self):
126
        """Return the current crumb path string."""
127
        return _build_path(self._path, arg_values=self.arg_values, with_regex=True)
128
129
    @path.setter
130
    def path(self, value):
131
        """ Set the current crumb path string and updates the internal members.
132
        Parameters
133
        ----------
134
        value: str
135
            A file or folder path with crumb arguments. See Examples in class docstring.
136
        """
137
        self._path = value
138
        self._update()
139
140
    def has_crumbs(self, crumb_path=None):
141
        """ Return True if the current path has open crumb arguments, False otherwise.
142
        If `crumb_path` is None will test on `self.path` instead.
143
        """
144
        if crumb_path is None:
145
            crumb_path = self.path
146
        return has_crumbs(crumb_path)
147
148
    def _open_arg_items(self):
149
        """ Return an iterator to the crumb _argidx items in `self` that have not been replaced yet.
150
        In the same order as they appear in the crumb path.
151
152
        Returns
153
        -------
154
        crumb_args: set of str
155
156
        Note
157
        ----
158
        I know that there is shorter/faster ways to program this but I wanted to maintain the
159
        order of the arguments in argidx in the result of this function.
160
        """
161
        for depth, arg_name in _depth_names(self.path):
162
            yield depth, arg_name
163
164
    def _last_open_arg(self):
165
        """ Return the name and idx of the last (right-most) open argument."""
166
        for dpth, arg in reversed(list(self._open_arg_items())):
167
            return dpth, arg
168
169
    def _first_open_arg(self):
170
        """ Return the name and idx of the first (left-most) open argument."""
171
        for dpth, arg in self._open_arg_items():
172
            return dpth, arg
173
174
    def _is_first_open_arg(self, arg_name):
175
        """ Return True if `arg_name` is the first open argument."""
176
        # Take into account that self._argidx is OrderedDict
177
        return arg_name == self._first_open_arg()[1]
178
179
    def has_set(self, arg_name):
180
        """ Return True if the argument `arg_name` has been set to a specific value,
181
        False if it is still a crumb argument."""
182
        return arg_name not in set(self.open_args())
183
184
    def open_args(self):
185
        """ Return an iterator to the crumb argument names in `self` that have not been replaced yet.
186
        In the same order as they appear in the crumb path."""
187
        for _, arg_name in self._open_arg_items():
188
            yield arg_name
189
190
    def all_args(self):
191
        """ Return an iterator to all the crumb argument names in `self`, first the open ones and then the
192
        replaced ones.
193
194
        Returns
195
        -------
196
        crumb_args: set of str
197
        """
198
        return _arg_names(self._path)
199
200
    def copy(self, crumb=None):
201
        """ Return a deep copy of the given `crumb`.
202
        If `crumb` is None will return a copy of self.
203
204
        Parameters
205
        ----------
206
        crumb: str or Crumb
207
208
        Returns
209
        -------
210
        copy: Crumb
211
        """
212
        if crumb is None:
213
            crumb = self
214
215
        if isinstance(crumb, Crumb):
216
            nucr = Crumb(crumb._path, ignore_list=crumb._ignore, regex=crumb._re_method)
217
            nucr._argval = deepcopy(crumb._argval)
218
            return nucr
219
        elif isinstance(crumb, string_types):
220
            return Crumb.from_path(crumb)
221
        else:
222
            raise TypeError("Expected a Crumb or a str to copy, got {}.".format(type(crumb)))
223
224
    def isabs(self):
225
        """ Return True if the current crumb path has an absolute path, False otherwise.
226
        This means that its path is valid and starts with a `op.sep` character
227
        or hard disk letter.
228
        """
229
        subp = _first_txt(self.path)
230
        return op.isabs(subp)
231
232
    def abspath(self, first_is_basedir=False):
233
        """ Return a copy of `self` with an absolute crumb path.
234
        Add as prefix the absolute path to the current directory if the current
235
        crumb is not absolute.
236
        Parameters
237
        ----------
238
        first_is_basedir: bool
239
            If True and the current crumb path starts with a crumb argument and first_is_basedir,
240
            the first argument will be replaced by the absolute path to the current dir,
241
            otherwise the absolute path to the current dir will be added as a prefix.
242
243
        Returns
244
        -------
245
        abs_crumb: Crumb
246
        """
247
        nucr = self.copy()
248
249
        if not nucr.isabs():
250
            nucr._path = self._abspath(first_is_basedir=first_is_basedir)
251
252
        return nucr
253
254
    def _abspath(self, first_is_basedir=False):
255
        """ Return the absolute path of the current crumb path.
256
        Parameters
257
        ----------
258
        first_is_basedir: bool
259
            If True and the current crumb path starts with a crumb argument and first_is_basedir,
260
            the first argument will be replaced by the absolute path to the current dir,
261
            otherwise the absolute path to the current dir will be added as a prefix.
262
263
        Returns
264
        -------
265
        abspath: str
266
        """
267
        if op.isabs(self._path):
268
            return self._path
269
270
        splits = self._path.split(op.sep)
271
        basedir = [op.abspath(op.curdir)]
272
273
        if _is_crumb_arg(splits[0]):
274
            if first_is_basedir:
275
                splits.pop(0)
276
277
        basedir.extend(splits)
278
        return op.sep.join(basedir)
279
280
    def split(self):
281
        """ Return a list of sub-strings of the current crumb path where the
282
            first path part is separated from the crumb arguments.
283
284
        Returns
285
        -------
286
        crumbs: list of str
287
        """
288
        return _split(self.path)
289
290
    @classmethod
291
    def from_path(cls, crumb_path):
292
        """ Create an instance of Crumb out of `crumb_path`.
293
        Parameters
294
        ----------
295
        val: str or Crumb or pathlib.Path
296
297
        Returns
298
        -------
299
        path: Crumb
300
        """
301
        if isinstance(crumb_path, Crumb):
302
            return crumb_path.copy()
303
        elif isinstance(crumb_path, (Crumb, Path)):
304
            return cls.copy(str(crumb_path))
305
        elif isinstance(crumb_path, string_types):
306
            return cls(crumb_path)
307
        else:
308
            raise TypeError("Expected a `val` to be a `str`, got {}.".format(type(crumb_path)))
309
310
    def _arg_values(self, arg_name, arg_values=None):
311
        """ Return the existing values in the file system for the crumb argument
312
        with name `arg_name`.
313
        The `arg_values` must be a sequence with the tuples with valid values of the dependent
314
        (previous in the path) crumb arguments.
315
        The format of `arg_values` work in such a way that `self._path.format(dict(arg_values[0]))`
316
        would give me a valid path or crumb.
317
        Parameters
318
        ----------
319
        arg_name: str
320
321
        arg_values: list of tuples
322
323
        Returns
324
        -------
325
        vals: list of tuples
326
327
        Raises
328
        ------
329
        ValueError: if `arg_values` is None and `arg_name` is not the
330
        first crumb argument in self._path
331
332
        AttributeError: if the path is not absolute
333
334
        IOError: if this crosses to any path that is non-existing.
335
        """
336
        if arg_values is None and not self._is_first_open_arg(arg_name):
337
            raise ValueError("Cannot get the list of values for {} if"
338
                             " the previous arguments are not filled"
339
                             " in `paths`.".format(arg_name))
340
341
        # check if the path is absolute, if not raise an AttributeError
342
        # this shouldn't really happen because this is a private function.
343
        # This check is going to be here temporarily: TODO
344
        if not self.isabs():
345
            raise AttributeError("Expected an absolute crumb path but got {}.".format(self.path))
346
347
        path = self.path
348
        dpth, arg_name, arg_regex = _find_arg_depth(path, arg_name)
349
        splt = path.split(op.sep)
350
351
        if dpth == len(splt) - 1:  # this means we have to list files too
352
            just_dirs = False
353
        else:  # this means we have to list folders
354
            just_dirs = True
355
356
        vals = []
357
        if arg_values is None:
358
            base = op.sep.join(splt[:dpth])
359
            vals = list_subpaths(base,
360
                                 just_dirs=just_dirs,
361
                                 ignore=self._ignore,
362
                                 pattern=arg_regex,
363
                                 filter_func=self._match_filter,
364
                                 filter_args=self._re_args)
365
366
            vals = [[(arg_name, val)] for val in vals]
367
        else:
368
            for aval in arg_values:
369
                #  create the part of the crumb path that is already specified
370
                nupath = _split(_build_path(path, arg_values=dict(aval)))[0]
371
372
                # THIS HAPPENS, LEAVE IT. TODO: make a test for this line
373
                if not op.exists(nupath):
374
                    continue
375
376
                paths = list_subpaths(nupath,
377
                                      just_dirs=just_dirs,
378
                                      ignore=self._ignore,
379
                                      pattern=arg_regex,
380
                                      filter_func=self._match_filter)
381
382
                #  extend `val` tuples with the new list of values for `aval`
383
                vals.extend([aval + [(arg_name, sp)] for sp in paths])
384
385
        return vals
386
387
    def _check_args(self, arg_names, self_args):
388
        """ Raise a ValueError if `self_args` is empty.
389
            Raise a KeyError if `arg_names` is not a subset of `self_args`.
390
        """
391
        anames = set(arg_names)
392
        aself  = set(self_args)
393
        if not anames and not aself:
394
            return
395
396
        if not aself:
397
            raise AttributeError('This Crumb has no remaining arguments: {}.'.format(self.path))
398
399
        if not anames.issubset(aself):
400
            raise KeyError("Expected `arg_names` to be a subset of ({}),"
401
                           " got {}.".format(list(aself), anames))
402
403
    def _check_open_args(self, arg_names):
404
        """ Raise a KeyError if any of the arguments in `arg_names` is not a crumb
405
        argument name in `self.path`.
406
        Parameters
407
        ----------
408
        arg_names: sequence of str
409
            Names of crumb arguments
410
411
        Raises
412
        ------
413
        KeyError
414
        """
415
        return self._check_args(arg_names, self_args=self.open_args())
416
417
    def update(self, **kwargs):
418
        """ Set the crumb arguments in path to the given values in kwargs and update
419
        self accordingly.
420
        Parameters
421
        ----------
422
        kwargs: strings
423
424
        Returns
425
        -------
426
        crumb: Crumb
427
        """
428
        self._check_args(kwargs.keys(), self_args=self.all_args())
429
430
        for k, v in kwargs.items():
431
            if not isinstance(v, string_types):
432
                raise ValueError("Expected a string for the value of argument {}, "
433
                                 "got {}.".format(k, v))
434
435
        path = _build_path(self.path, arg_values=kwargs, with_regex=True)
436
        _check(path)
437
438
        self._argval.update(**kwargs)
439
        return self
440
441
    def replace(self, **kwargs):
442
        """ Return a copy of self with the crumb arguments in
443
        `kwargs` replaced by its values.
444
        As an analogy to the `str.format` function this function could be called `format`.
445
        Parameters
446
        ----------
447
        kwargs: strings
448
449
        Returns
450
        -------
451
        crumb:
452
        """
453
        cr = self.copy(self)
454
        return cr.update(**kwargs)
455
456
    def _arg_parents(self, arg_name):
457
        """ Return a subdict with the open arguments name and index in `self._argidx`
458
        that come before `arg_name` in the crumb path. Include `arg_name` himself.
459
        Parameters
460
        ----------
461
        arg_name: str
462
463
        Returns
464
        -------
465
        arg_deps: Mapping[str, int]
466
        """
467
        dpth, _, _ = _find_arg_depth(self.path, arg_name)
468
        return OrderedDict([(arg, idx) for idx, arg in self._open_arg_items() if idx <= dpth])
469
470
    def _args_open_parents(self, arg_names):
471
        """ Return the name of the arguments that are dependencies of `arg_names`.
472
        Parameters
473
        ----------
474
        arg_names: Sequence[str]
475
476
        Returns
477
        -------
478
        rem_deps: Sequence[str]
479
        """
480
        started = False
481
        arg_dads = []
482
        for an in reversed(list(self.open_args())):  # take into account that argidx is ordered
483
            if an in arg_names:
484
                started = True
485
            else:
486
                if started:
487
                    arg_dads.append(an)
488
489
        return list(reversed(arg_dads))
490
491
    def values_map(self, arg_name='', check_exists=False):
492
        """ Return a list of tuples of crumb arguments with their values from the first argument
493
        until `arg_name`.
494
        Parameters
495
        ----------
496
        arg_name: str
497
            If empty will pick the arg_name of the last open argument of the Crumb.
498
499
        check_exists: bool
500
501
        Returns
502
        -------
503
        values_map: list of lists of 2-tuples
504
            I call values_map what is called `record` in pandas. It is a list of lists of 2-tuples, where each 2-tuple
505
            has the shape (arg_name, arg_value).
506
        """
507
        if not arg_name:
508
            _, arg_name = self._last_open_arg()
509
510
        arg_deps = self._arg_parents(arg_name)
511
        values_map = None
512
        for arg in arg_deps:
513
            values_map = self._arg_values(arg, values_map)
514
515
        if check_exists:
516
            paths = [cr for cr in self.build_paths(values_map, make_crumbs=True)]
517
            values_map_checked = [args for args, path in zip(values_map, paths) if path.exists()]
518
        else:
519
            values_map_checked = values_map
520
521
        return sorted(values_map_checked)
522
523
    def build_paths(self, values_map, make_crumbs=True):
524
        """ Return a list of paths from each tuple of args from `values_map`
525
        Parameters
526
        ----------
527
        values_map: list of sequences of 2-tuple
528
            Example: [[('subject_id', 'haensel'), ('candy', 'lollipop.png')],
529
                      [('subject_id', 'gretel'),  ('candy', 'jujube.png')],
530
                     ]
531
532
        make_crumbs: bool
533
            If `make_crumbs` is True will create a Crumb for
534
            each element of the result.
535
            Default: True.
536
537
        Returns
538
        -------
539
        paths: list of str or list of Crumb
540
        """
541
        if make_crumbs:
542
            return [self.replace(**dict(val)) for val in values_map]
543
        else:
544
            return [_build_path(self.path, arg_values=dict(val)) for val in values_map]
545
546
    def ls(self, arg_name='', fullpath=True, make_crumbs=True, check_exists=False):
547
        """ Return the list of values for the argument crumb `arg_name`.
548
        This will also unfold any other argument crumb that appears before in the
549
        path.
550
        Parameters
551
        ----------
552
        arg_name: str
553
            Name of the argument crumb to be unfolded.
554
            If empty will pick the arg_name of the last open argument of the Crumb.
555
556
        fullpath: bool
557
            If True will build the full path of the crumb path, will also append
558
            the rest of crumbs not unfolded.
559
            If False will only return the values for the argument with name
560
            `arg_name`.
561
562
        make_crumbs: bool
563
            If `fullpath` and `make_crumbs` is True will create a Crumb for
564
            each element of the result.
565
566
        check_exists: bool
567
            If True will return only str, Crumb or Path if it exists
568
            in the file path, otherwise it may create file paths
569
            that don't have to exist.
570
571
        Returns
572
        -------
573
        values: list of Crumb or str
574
575
        Examples
576
        --------
577
        >>> cr = Crumb(op.join(op.expanduser('~'), '{user_folder}'))
578
        >>> user_folders = cr.ls('user_folder',fullpath=True,make_crumbs=True)
579
        """
580
        if not arg_name:
581
            _, arg_name = self._last_open_arg()
582
583
        self._check_open_args([arg_name])
584
585
        # if the first chunk of the path is a parameter, I am not interested in this (for now)
586
        # check if the path is absolute, if not raise an NotImplementedError
587
        if not self.isabs():
588
            raise NotImplementedError("Cannot list paths that start with an argument. "
589
                                      "If this is a relative path, use the `abspath()` member function.")
590
591
        if make_crumbs and not fullpath:
592
            raise ValueError("`make_crumbs` can only work if `fullpath` is also True.")
593
594
        values_map = self.values_map(arg_name, check_exists=check_exists)
595
596
        if fullpath:
597
            paths = self.build_paths(values_map, make_crumbs=make_crumbs)
598
599
        else:
600
            paths = [dict(val)[arg_name] for val in values_map]
601
602
        return sorted(paths)
603
604
    def touch(self, exist_ok=True):
605
        """ Create a leaf directory and all intermediate ones using the non
606
        crumbed part of `crumb_path`.
607
        If the target directory already exists, raise an IOError if exist_ok
608
        is False. Otherwise no exception is raised.
609
        Parameters
610
        ----------
611
        crumb_path: str
612
613
        exist_ok: bool
614
            Default = True
615
616
        Returns
617
        -------
618
        nupath: str
619
            The new path created.
620
        """
621
        return _touch(self.path, exist_ok=exist_ok)
622
623
    def joinpath(self, suffix):
624
        """ Return a copy of the current crumb with the `suffix` path appended.
625
        If suffix has crumb arguments, the whole crumb will be updated.
626
        Parameters
627
        ----------
628
        suffix: str
629
630
        Returns
631
        -------
632
        cr: Crumb
633
        """
634
        return Crumb(op.join(self.path, suffix))
635
636
    def exists(self):
637
        """ Return True if the current crumb path is a possibly existing path,
638
        False otherwise.
639
        Returns
640
        -------
641
        exists: bool
642
        """
643
        if not has_crumbs(self.path):
644
            return op.exists(str(self)) or op.islink(str(self))
645
646
        if not op.exists(self.split()[0]):
647
            return False
648
649
        _, last = self._last_open_arg()
650
651
        paths = self.ls(last, fullpath=True, make_crumbs=False, check_exists=False)
652
653
        return any([_split_exists(lp) for lp in paths])
654
655
    def has_files(self):
656
        """ Return True if the current crumb path has any file in its
657
        possible paths.
658
        Returns
659
        -------
660
        has_files: bool
661
        """
662
        if not op.exists(self.split()[0]):
663
            return False
664
665
        _, last = self._last_open_arg()
666
        paths = self.ls(last, fullpath=True, make_crumbs=True, check_exists=True)
667
668
        return any([op.isfile(str(lp)) for lp in paths])
669
670
    def unfold(self):
671
        """ Return a list of all the existing paths until the last crumb argument.
672
        If there are no remaining open arguments,
673
        Returns
674
        -------
675
        paths: list of pathlib.Path
676
        """
677
        if list(self.open_args()):
678
            return self.ls(self._last_open_arg()[1], fullpath=True, make_crumbs=True, check_exists=True)
679
        else:
680
            return [self]
681
682
    def __getitem__(self, arg_name):
683
        """ Return the existing values of the crumb argument `arg_name`
684
        without removing duplicates.
685
        Parameters
686
        ----------
687
        arg_name: str
688
689
        Returns
690
        -------
691
        values: list of str
692
        """
693
        if arg_name in self._argval:
694
            return [self._argval[arg_name]]
695
        else:
696
            return self.ls(arg_name, fullpath=False, make_crumbs=False, check_exists=True)
697
698
    def __setitem__(self, key, value):
699
        if not _has_arg(self.path, arg_name=key):
700
            raise KeyError("Expected `arg_name` to be one of ({}),"
701
                           " got {}.".format(list(self.open_args()), key))
702
        _ = self.update(**{key: value})
703
704
    def __ge__(self, other):
705
        return self._path >= str(other)
706
707
    def __le__(self, other):
708
        return self._path <= str(other)
709
710
    def __gt__(self, other):
711
        return self._path > str(other)
712
713
    def __lt__(self, other):
714
        return self._path < str(other)
715
716
    def __hash__(self):
717
        return self._path.__hash__()
718
719
    def __contains__(self, arg_name):
720
        return arg_name in self.all_args()
721
722
    def __repr__(self):
723
        return '{}("{}")'.format(type(self).__name__, self.path)
724
725
    def __str__(self):
726
        return self.path
727
728
    def __eq__(self, other):
729
        """ Return True if `self` and `other` are equal, False otherwise.
730
        Parameters
731
        ----------
732
        other: Crumb
733
734
        Returns
735
        -------
736
        is_equal: bool
737
        """
738
        if self._path != other._path:
739
            return False
740
741
        if self._argval != other._argval:
742
            return False
743
744
        if self._ignore != other._ignore:
745
            return False
746
747
        return True
748