Completed
Push — master ( eb9dd6...a22857 )
by Alexandre M.
02:03
created

hansel.Crumb.__gt__()   A

Complexity

Conditions 1

Size

Total Lines 2

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 1
dl 0
loc 2
rs 10
1
# -*- coding: utf-8 -*-
2
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
3
# vi: set ft=python sts=4 ts=4 sw=4 et:
4
"""
5
Crumb class: the smart path model class.
6
"""
7
8
import os.path     as op
9
from   copy        import deepcopy
10
from   collections import OrderedDict, Mapping, Sequence
11
from   pathlib     import Path
12
from   functools   import partial
13
14
from   six import string_types
15
16
from   .utils import remove_duplicates, list_children
17
from   ._utils import (_get_path, _arg_name,
18
                       _is_crumb_arg, _replace,
19
                       _split_exists, _split,
20
                       _touch, has_crumbs, is_valid,
21
                       #_arg_format,
22
                       )
23
24
25
class Crumb(object):
26
    """ The crumb path model class.
27
    Parameters
28
    ----------
29
    crumb_path: str
30
        A file or folder path with crumb arguments. See Examples.
31
32
    ignore_list: sequence of str
33
        A list of `fnmatch` patterns of filenames to be ignored.
34
35
    Examples
36
    --------
37
    >>> crumb = Crumb("{base_dir}/raw/{subject_id}/{session_id}/{modality}/{image}")
38
    >>> cr = Crumb(op.join(op.expanduser('~'), '{user_folder}'))
39
    """
40
    _arg_start_sym = '{'
41
    _arg_end_sym   = '}'
42
43
    # specify partial functions from _utils with _arg_start_sym and _arg_end_sym
44
    # everything would be much simpler if I hardcoded these symbols but I still
45
    # feel that this flexibility is nice to have.
46
    # _arg_format   = partial(_arg_format,     start_sym=_arg_start_sym, end_sym=_arg_end_sym)
47
    _is_crumb_arg = partial(_is_crumb_arg, start_sym=_arg_start_sym, end_sym=_arg_end_sym)
48
    _arg_name     = partial(_arg_name,     start_sym=_arg_start_sym, end_sym=_arg_end_sym)
49
    is_valid      = partial(is_valid,      start_sym=_arg_start_sym, end_sym=_arg_end_sym)
50
    has_crumbs    = partial(has_crumbs,      start_sym=_arg_start_sym, end_sym=_arg_end_sym)
51
    _replace      = partial(_replace,      start_sym=_arg_start_sym, end_sym=_arg_end_sym)
52
    _split        = partial(_split,      start_sym=_arg_start_sym, end_sym=_arg_end_sym)
53
    _touch        = partial(_touch,      start_sym=_arg_start_sym, end_sym=_arg_end_sym)
54
    _split_exists = partial(_split_exists,      start_sym=_arg_start_sym, end_sym=_arg_end_sym)
55
56
57
    def __init__(self, crumb_path, ignore_list=()):
58
        self._path   = _get_path(crumb_path)
59
        self._argidx = OrderedDict()
60
        self._ignore = ignore_list
61
        self._update()
62
63
    @property
64
    def path(self):
65
        """Return the current crumb path string."""
66
        return self._path
67
68
    @path.setter
69
    def path(self, value):
70
        """ Set the current crumb path string and updates the internal members.
71
        Parameters
72
        ----------
73
        value: str
74
            A file or folder path with crumb arguments. See Examples in class docstring.
75
        """
76
        self._path = value
77
        self._update()
78
79
    def _check(self):
80
        if not self.is_valid(self._path):
81
            raise ValueError("The current crumb path has errors, got {}.".format(self.path))
82
83
    def _update(self):
84
        """ Clean up, parse the current crumb path and fill the internal
85
        members for functioning."""
86
        self._clean()
87
        self._check()
88
        self._set_argidx()
89
        # self._set_replace_func()
90
91
    def _clean(self):
92
        """ Clean up the private utility members, i.e., _argidx. """
93
        self._argidx = OrderedDict()
94
95
    @classmethod
96
    def copy(cls, crumb):
97
        """ Return a deep copy of the given `crumb`.
98
        Parameters
99
        ----------
100
        crumb: str or Crumb
101
102
        Returns
103
        -------
104
        copy: Crumb
105
        """
106
        if isinstance(crumb, cls):
107
            return cls(crumb._path, ignore_list=crumb._ignore)
108
        elif isinstance(crumb, string_types):
109
            return cls.from_path(crumb)
110
        else:
111
            raise TypeError("Expected a Crumb or a str to copy, got {}.".format(type(crumb)))
112
113
    def _set_argidx(self):
114
        """ Initialize the self._argidx dict. It holds arg_name -> index.
115
        The index is the position in the whole `_path.split(op.sep)` where each argument is.
116
        """
117
        fs = self._path_split()
118
        for idx, f in enumerate(fs):
119
            if self._is_crumb_arg(f):
120
                self._argidx[self._arg_name(f)] = idx
121
122
    def _find_arg(self, arg_name):
123
        """ Return the index in the current path of the crumb
124
        argument with name `arg_name`.
125
        """
126
        return self._argidx.get(arg_name, -1)
127
128
    def isabs(self):
129
        """ Return True if the current crumb path has an
130
        absolute path, False otherwise.
131
        This means that if it is valid and does not start with a `op.sep` character
132
        or hard disk letter.
133
        """
134
        if not self.is_valid(self._path):
135
            raise ValueError("The given crumb path has errors, got {}.".format(self.path))
136
137
        subp = self._path.split(self._arg_start_sym)[0]
138
        return op.isabs(subp)
139
140
    def abspath(self, first_is_basedir=False):
141
        """ Return a copy of `self` with an absolute crumb path.
142
        Add as prefix the absolute path to the current directory if the current
143
        crumb is not absolute.
144
        Parameters
145
        ----------
146
        first_is_basedir: bool
147
            If True and the current crumb path starts with a crumb argument and first_is_basedir,
148
            the first argument will be replaced by the absolute path to the current dir,
149
            otherwise the absolute path to the current dir will be added as a prefix.
150
151
152
        Returns
153
        -------
154
        abs_crumb: Crumb
155
        """
156
        if not self.is_valid(self._path):
157
            raise ValueError("The given crumb path has errors, got {}.".format(self.path))
158
159
        if self.isabs():
160
            return deepcopy(self)
161
162
        return self.copy(self._abspath(first_is_basedir=first_is_basedir))
163
164
    def _path_split(self):
165
        return self._path.split(op.sep)
166
167
    def _abspath(self, first_is_basedir=False):
168
        """ Return the absolute path of the current crumb path.
169
        Parameters
170
        ----------
171
        first_is_basedir: bool
172
            If True and the current crumb path starts with a crumb argument and first_is_basedir,
173
            the first argument will be replaced by the absolute path to the current dir,
174
            otherwise the absolute path to the current dir will be added as a prefix.
175
176
177
        Returns
178
        -------
179
        abspath: str
180
        """
181
        if not self.has_crumbs(self._path):
182
             return op.abspath(self._path)
183
184
        splt = self._path_split()
185
        path = []
186
        if self._is_crumb_arg(splt[0]):
187
            path.append(op.abspath(op.curdir))
188
189
        if not first_is_basedir:
190
            path.append(splt[0])
191
192
        if splt[1:]:
193
            path.extend(splt[1:])
194
195
        return op.sep.join(path)
196
197
    def split(self):
198
        """ Return a list of sub-strings of the current crumb path where the
199
            path parts are separated from the crumb arguments.
200
201
        Returns
202
        -------
203
        crumbs: list of str
204
        """
205
        return self._split(self._path)
206
207
    @classmethod
208
    def from_path(cls, crumb_path):
209
        """ Create an instance of Crumb or pathlib.Path out of `crumb_path`.
210
        It will return a Crumb if `crumb_path` has crumbs or
211
        Parameters
212
        ----------
213
        val: str, Crumb or pathlib.Path
214
215
        Returns
216
        -------
217
        path: Crumb or pathlib.Path
218
        """
219
        if isinstance(crumb_path, (cls, Path)):
220
            return crumb_path
221
222
        if isinstance(crumb_path, string_types):
223
            if cls.has_crumbs(crumb_path):
224
                return cls(crumb_path)
225
            else:
226
                return Path(crumb_path)
227
        else:
228
            raise TypeError("Expected a `val` to be a `str`, got {}.".format(type(crumb_path)))
229
230
    # def _set_replace_func(self):
231
    #     """ Set the fastest replace algorithm depending on how
232
    #     many arguments the path has."""
233
    #     self._replace = self._replace2
234
    #     if len(self._argidx) > 5:
235
    #         self._replace = self._replace1
236
237
    # def _replace2(self, start_sym='{', end_sym='}', **kwargs):
238
    #
239
    #     if start_sym != '{' or end_sym != '}':
240
    #         raise NotImplementedError
241
    #
242
    #     if not kwargs:
243
    #         return self._path
244
    #
245
    #     args = {v: self._arg_format(v) for v in self._argidx}
246
    #
247
    #     for k in kwargs:
248
    #         if k not in args:
249
    #             raise KeyError("Could not find argument {}"
250
    #                            " in `path` {}.".format(k, self._path))
251
    #
252
    #         args[k] = kwargs[k]
253
    #
254
    #     return self._path.format_map(args)
255
256
    def _lastarg(self):
257
        """ Return the name and idx of the last argument."""
258
        for arg, idx in reversed(list(self._argidx.items())):
259
            return arg, idx
260
261
    def _firstarg(self):
262
        """ Return the name and idx of the first argument."""
263
        for arg, idx in self._argidx.items():
264
            return arg, idx
265
266
    def _is_firstarg(self, arg_name):
267
        """ Return True if `arg_name` is the first argument."""
268
        # Take into account that self._argidx is OrderedDict
269
        return arg_name == self._firstarg()[0]
270
271
    def _arg_values(self, arg_name, arg_values=None):
272
        """ Return the existing values in the file system for the crumb argument
273
        with name `arg_name`.
274
        The `arg_values` must be a sequence with the tuples with valid values of the dependent
275
        (previous in the path) crumb arguments.
276
        The format of `arg_values` work in such a way that `self._path.format(dict(arg_values[0]))`
277
        would give me a valid path or crumb.
278
        Parameters
279
        ----------
280
        arg_name: str
281
282
        arg_values: list of tuples
283
284
        Returns
285
        -------
286
        vals: list of tuples
287
288
        Raises
289
        ------
290
        ValueError: if `arg_values` is None and `arg_name` is not the
291
        first crumb argument in self._path
292
293
        IOError: if this crosses to any path that is non-existing.
294
        """
295
        if arg_values is None and not self._is_firstarg(arg_name):
296
            raise ValueError("Cannot get the list of values for {} if"
297
                             " the previous arguments are not filled"
298
                             " in `paths`.".format(arg_name))
299
300
        aidx = self._find_arg(arg_name)
301
302
        # check if the path is absolute, do it absolute
303
        apath = self._abspath()
304
        splt = apath.split(op.sep)
305
306
        if aidx == len(splt) - 1:  # this means we have to list files too
307
            just_dirs = False
308
        else:  # this means we have to list folders
309
            just_dirs = True
310
311
        vals = []
312
        if arg_values is None:
313
            base = op.sep.join(splt[:aidx])
314
            vals = [[(arg_name, val)] for val in list_children(base, just_dirs=just_dirs, ignore=self._ignore)]
315
        else:
316
            for aval in arg_values:
317
                #  create the part of the crumb path that is already specified
318
                path = self._split(self._replace(self._path, **dict(aval)))[0]
319
320
                #  list the children of `path`
321
                subpaths = list_children(path, just_dirs=just_dirs, ignore=self._ignore)
322
323
                #  extend `val` tuples with the new list of values for `aval`
324
                vals.extend([aval + [(arg_name, sp)] for sp in subpaths])
325
326
        return vals
327
328
    def replace(self, **kwargs):
329
        """ Return a copy of self with the crumb arguments in
330
        `kwargs` replaced by its values.
331
        Parameters
332
        ----------
333
        kwargs: strings
334
335
        Returns
336
        -------
337
        crumb:
338
        """
339
        for arg_name in kwargs:
340
            if arg_name not in self._argidx:
341
                raise KeyError("Expected `arg_name` to be one of ({}),"
342
                                 " got {}.".format(list(self._argidx), arg_name))
343
344
        cr = self.copy(self)
345
        cr._path = cr._replace(self._path, **kwargs)
346
        return Crumb.from_path(cr._path)
347
348
    def _arg_deps(self, arg_name):
349
        """ Return a subdict of `self._argidx` with the
350
         values from the crumb arguments that come before
351
         `arg_name` in the crumb path.
352
        Parameters
353
        ----------
354
        arg_name: str
355
356
        Returns
357
        -------
358
        arg_deps: Mapping[str, int]
359
        """
360
        argidx = self._find_arg(arg_name)
361
        return OrderedDict([(arg, idx) for arg, idx in self._argidx.items() if idx <= argidx])
362
363
    def values_map(self, arg_name, check_exists=False):
364
        """ Return a list of tuples of crumb arguments with their values.
365
366
        Parameters
367
        ----------
368
        arg_name: str
369
370
        check_exists: bool
371
372
        Returns
373
        -------
374
        values_map: list of lists of 2-tuples
375
        """
376
        arg_deps = self._arg_deps(arg_name)
377
        values_map = None
378
        for arg in arg_deps:
379
            values_map = self._arg_values(arg, values_map)
380
381
        if check_exists:
382
            paths = [self.from_path(path) for path in self._build_paths(values_map)]
383
            values_map_checked = [args for args, path in zip(values_map, paths) if path.exists()]
384
        else:
385
            values_map_checked = values_map
386
387
        return values_map_checked
388
389
    def _build_paths(self, values_map):
390
        """ Return a list of paths from each tuple of args from `values_map`
391
        Parameters
392
        ----------
393
        values_map: list of sequences of 2-tuple
394
395
        Returns
396
        -------
397
        paths: list of str
398
        """
399
        return [self._replace(self._path, **dict(val)) for val in values_map]
400
401
    def ls(self, arg_name, fullpath=True, rm_dups=False, make_crumbs=True, check_exists=False):
402
        """
403
        Return the list of values for the argument crumb `arg_name`.
404
        This will also unfold any other argument crumb that appears before in the
405
        path.
406
        Parameters
407
        ----------
408
        arg_name: str
409
            Name of the argument crumb to be unfolded.
410
411
        fullpath: bool
412
            If True will build the full path of the crumb path, will also append
413
            the rest of crumbs not unfolded.
414
            If False will only return the values for the argument with name
415
            `arg_name`.
416
417
        rm_dups: bool
418
            If True will remove and sort the duplicate values from the result.
419
            Otherwise it will leave it as it is.
420
421
        make_crumbs: bool
422
            If `fullpath` and `make_crumbs` is True will create a Crumb or a pathlib.Path
423
            for each element of the result. This will depende if the result item still has
424
            crumb arguments or not.
425
426
        check_exists: bool
427
            If True will return only str, Crumb or Path if it exists
428
            in the file path, otherwise it may create file paths
429
            that don't have to exist.
430
431
        Returns
432
        -------
433
        values: list of str or Crumb
434
435
        Examples
436
        --------
437
        >>> cr = Crumb(op.join(op.expanduser('~'), '{user_folder}'))
438
        >>> user_folders = cr.ls('user_folder', fullpath=True, rm_dups=True, make_crumbs=True)
439
        """
440
        if arg_name not in self._argidx:
441
            raise ValueError("Expected `arg_name` to be one of ({}),"
442
                             " got {}.".format(list(self._argidx), arg_name))
443
444
        # if the first chunk of the path is a parameter, I am not interested in this (for now)
445
        if self._path.startswith(self._arg_start_sym):
446
            raise NotImplementedError("Can't list paths that starts"
447
                                      " with an argument.")
448
449
        if make_crumbs and not fullpath:
450
            raise ValueError("`make_crumbs` can only work if `fullpath` is also True.")
451
452
        values_map = self.values_map(arg_name, check_exists=check_exists)
453
454
        if not fullpath and not make_crumbs:
455
            paths = [dict(val)[arg_name] for val in values_map]
456
        else:
457
            paths = self._build_paths(values_map)
458
459
        if rm_dups:
460
            paths = remove_duplicates(paths)
461
462
        if fullpath and make_crumbs:
463
            paths = sorted([self.from_path(path) for path in paths])
464
465
        return paths
466
467
    def _remaining_deps(self, arg_names):
468
        """ Return the name of the arguments that are dependencies of `arg_names`.
469
        Parameters
470
        ----------
471
        arg_names: Sequence[str]
472
473
        Returns
474
        -------
475
        rem_deps: Sequence[str]
476
        """
477
        started = False
478
        rem_deps = []
479
        for an in reversed(list(self._argidx.keys())):  # take into account that argidx is ordered
480
            if an in arg_names:
481
                started = True
482
            else:
483
                if started:
484
                    rem_deps.append(an)
485
486
        return rem_deps
487
488
    def touch(self):
489
        """ Create a leaf directory and all intermediate ones
490
        using the non crumbed part of `crumb_path`.
491
        If the target directory already exists, raise an IOError
492
        if exist_ok is False. Otherwise no exception is raised.
493
        Parameters
494
        ----------
495
        crumb_path: str
496
497
        exist_ok: bool
498
            Default = True
499
500
        Returns
501
        -------
502
        nupath: str
503
            The new path created.
504
        """
505
        return self._touch(self._path)
506
507
    def joinpath(self, suffix):
508
        """ Return a copy of the current crumb with the `suffix` path appended.
509
        If suffix has crumb arguments, the whole crumb will be updated.
510
        Parameters
511
        ----------
512
        suffix: str
513
514
        Returns
515
        -------
516
        cr: Crumb
517
        """
518
        return Crumb(op.join(self._path, suffix))
519
520
    def exists(self):
521
        """ Return True if the current crumb path is a possibly existing path,
522
        False otherwise.
523
        Returns
524
        -------
525
        exists: bool
526
        """
527
        if not op.exists(self.split()[0]):
528
            return False
529
530
        last, _ = self._lastarg()
531
        paths = self.ls(last,
532
                        fullpath     = True,
533
                        make_crumbs  = False,
534
                        rm_dups   = True,
535
                        check_exists = False)
536
537
        return all([self._split_exists(lp) for lp in paths])
538
539
    def has_files(self):
540
        """ Return True if the current crumb path has any file in its
541
        possible paths.
542
        Returns
543
        -------
544
        has_files: bool
545
        """
546
        if not op.exists(self.split()[0]):
547
            return False
548
549
        last, _ = self._lastarg()
550
        paths = self.ls(last,
551
                        fullpath     = True,
552
                        make_crumbs  = True,
553
                        rm_dups      = False,
554
                        check_exists = True)
555
556
        return any([op.isfile(str(lp)) for lp in paths])
557
558
    def unfold(self):
559
        """ Return a list of all the existing paths until the last crumb argument.
560
        Returns
561
        -------
562
        paths: list of pathlib.Path
563
        """
564
        return self.ls(self._lastarg()[0],
565
                       fullpath    = True,
566
                       rm_dups     = True,
567
                       make_crumbs = True,
568
                       check_exists= True)
569
570
    def __getitem__(self, arg_name):
571
        """ Return the existing values of the crumb argument `arg_name`
572
        without removing duplicates.
573
        Parameters
574
        ----------
575
        arg_name: str
576
577
        Returns
578
        -------
579
        values: list of str
580
        """
581
        return self.ls(arg_name,
582
                       fullpath    = False,
583
                       rm_dups     = False,
584
                       make_crumbs = False,
585
                       check_exists= True)
586
587
    def __setitem__(self, key, value):
588
        if key not in self._argidx:
589
            raise KeyError("Expected `arg_name` to be one of ({}),"
590
                           " got {}.".format(list(self._argidx), key))
591
592
        self._path = self._replace(self._path, **{key: value})
593
        self._update()
594
595
    def __ge__(self, other):
596
        return self._path >= str(other)
597
598
    def __le__(self, other):
599
        return self._path <= str(other)
600
601
    def __gt__(self, other):
602
        return self._path > str(other)
603
604
    def __lt__(self, other):
605
        return self._path < str(other)
606
607
    def __hash__(self):
608
        return self._path.__hash__()
609
610
    def __contains__(self, item):
611
        return item in self._argidx
612
613
    def __repr__(self):
614
        return '{}("{}")'.format(__class__.__name__, self._path)
615
616
    def __str__(self):
617
        return str(self._path)
618
619
    def __eq__(self, other):
620
        """ Return True if `self` and `other` are equal, False otherwise.
621
        Parameters
622
        ----------
623
        other: Crumb
624
625
        Returns
626
        -------
627
        is_equal: bool
628
        """
629
        if self._path != other._path:
630
            return False
631
632
        if self._argidx != other._argidx:
633
            return False
634
635
        if self._ignore != other._ignore:
636
            return False
637
638
        return True
639
640