Completed
Push — master ( 93886c...85a4f1 )
by Alexandre M.
55s
created

hansel.Crumb.__eq__()   B

Complexity

Conditions 5

Size

Total Lines 23

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 5
dl 0
loc 23
rs 8.2508
1
# -*- coding: utf-8 -*-
2
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
3
# vi: set ft=python sts=4 ts=4 sw=4 et:
4
"""
5
Crumb class: the smart path model class.
6
"""
7
8
import os.path     as op
9
from   copy        import deepcopy
10
from   collections import OrderedDict, Mapping, Sequence
11
from   pathlib     import Path
12
from   functools   import partial
13
14
from   six import string_types
15
16
from   .utils import list_children
17
from   ._utils import (_get_path, _arg_name,
18
                       _is_crumb_arg, _replace,
19
                       _split_exists, _split,
20
                       _touch, has_crumbs, is_valid,
21
                       #_arg_format,
22
                       )
23
24
25
class Crumb(object):
26
    """ The crumb path model class.
27
    Parameters
28
    ----------
29
    crumb_path: str
30
        A file or folder path with crumb arguments. See Examples.
31
32
    ignore_list: sequence of str
33
        A list of `fnmatch` patterns of filenames to be ignored.
34
35
    Examples
36
    --------
37
    >>> crumb = Crumb("{base_dir}/raw/{subject_id}/{session_id}/{modality}/{image}")
38
    >>> cr = Crumb(op.join(op.expanduser('~'), '{user_folder}'))
39
    """
40
    # symbols indicating start and end of a crumb argument
41
    _start_end_syms = ('{', '}')
42
43
    # specify partial functions from _utils with _arg_start_sym and _arg_end_sym
44
    # everything would be much simpler if I hardcoded these symbols but I still
45
    # feel that this flexibility is nice to have.
46
    # _arg_format   = partial(_arg_format,   start_sym=_arg_start_sym, end_sym=_arg_end_sym)
47
    _is_crumb_arg = partial(_is_crumb_arg, start_end_syms=_start_end_syms)
48
    _arg_name     = partial(_arg_name,     start_end_syms=_start_end_syms)
49
    is_valid      = partial(is_valid,      start_end_syms=_start_end_syms)
50
    has_crumbs    = partial(has_crumbs,    start_end_syms=_start_end_syms)
51
    _replace      = partial(_replace,      start_end_syms=_start_end_syms)
52
    _split        = partial(_split,        start_end_syms=_start_end_syms)
53
    _touch        = partial(_touch,        start_end_syms=_start_end_syms)
54
    _split_exists = partial(_split_exists, start_end_syms=_start_end_syms)
55
56
57
    def __init__(self, crumb_path, ignore_list=()):
58
        self._path   = _get_path(crumb_path)
59
        self._argidx = OrderedDict() # in which order the crumb argument appears
60
        self._argval = {} # what is the value of the argument in the current path
61
        self._ignore = ignore_list
62
        self._update()
63
64
    @property
65
    def path(self):
66
        """Return the current crumb path string."""
67
        return self._path
68
69
    @path.setter
70
    def path(self, value):
71
        """ Set the current crumb path string and updates the internal members.
72
        Parameters
73
        ----------
74
        value: str
75
            A file or folder path with crumb arguments. See Examples in class docstring.
76
        """
77
        self._path = value
78
        self._update()
79
80
    def _check(self):
81
        if not self.is_valid(self._path):
82
            raise ValueError("The current crumb path has errors, got {}.".format(self.path))
83
84
    def _update(self):
85
        """ Clean up, parse the current crumb path and fill the internal
86
        members for functioning."""
87
        self._clean()
88
        self._check()
89
        self._set_argidx()
90
        # self._set_replace_func()
91
92
    def _clean(self):
93
        """ Clean up the private utility members, i.e., _argidx. """
94
        self._argidx = OrderedDict()
95
        self._argval = {}
96
97
    @classmethod
98
    def copy(cls, crumb):
99
        """ Return a deep copy of the given `crumb`.
100
        Parameters
101
        ----------
102
        crumb: str or Crumb
103
104
        Returns
105
        -------
106
        copy: Crumb
107
        """
108
        if isinstance(crumb, cls):
109
            nucr = cls(crumb._path, ignore_list=crumb._ignore)
110
            nucr._argval = deepcopy(crumb._argval)
111
            return nucr
112
        elif isinstance(crumb, string_types):
113
            return cls.from_path(crumb)
114
        else:
115
            raise TypeError("Expected a Crumb or a str to copy, got {}.".format(type(crumb)))
116
117
    def _set_argidx(self):
118
        """ Initialize the self._argidx dict. It holds arg_name -> index.
119
        The index is the position in the whole `_path.split(op.sep)` where each argument is.
120
        """
121
        fs = self._path_split()
122
        for idx, f in enumerate(fs):
123
            if self._is_crumb_arg(f):
124
                self._argidx[self._arg_name(f)] = idx
125
126
    def _find_arg(self, arg_name):
127
        """ Return the index in the current path of the crumb
128
        argument with name `arg_name`.
129
        """
130
        return self._argidx.get(arg_name, -1)
131
132
    def isabs(self):
133
        """ Return True if the current crumb path has an
134
        absolute path, False otherwise.
135
        This means that if it is valid and does not start with a `op.sep` character
136
        or hard disk letter.
137
        """
138
        if not self.is_valid(self._path):
139
            raise ValueError("The given crumb path has errors, got {}.".format(self.path))
140
141
        start_sym, _ = self._start_end_syms
142
        subp = self._path.split(start_sym)[0]
143
        return op.isabs(subp)
144
145
    def abspath(self, first_is_basedir=False):
146
        """ Return a copy of `self` with an absolute crumb path.
147
        Add as prefix the absolute path to the current directory if the current
148
        crumb is not absolute.
149
        Parameters
150
        ----------
151
        first_is_basedir: bool
152
            If True and the current crumb path starts with a crumb argument and first_is_basedir,
153
            the first argument will be replaced by the absolute path to the current dir,
154
            otherwise the absolute path to the current dir will be added as a prefix.
155
156
157
        Returns
158
        -------
159
        abs_crumb: Crumb
160
        """
161
        if not self.is_valid(self._path):
162
            raise ValueError("The given crumb path has errors, got {}.".format(self.path))
163
164
        if self.isabs():
165
            return deepcopy(self)
166
167
        return self.copy(self._abspath(first_is_basedir=first_is_basedir))
168
169
    def _path_split(self):
170
        return self._path.split(op.sep)
171
172
    def _abspath(self, first_is_basedir=False):
173
        """ Return the absolute path of the current crumb path.
174
        Parameters
175
        ----------
176
        first_is_basedir: bool
177
            If True and the current crumb path starts with a crumb argument and first_is_basedir,
178
            the first argument will be replaced by the absolute path to the current dir,
179
            otherwise the absolute path to the current dir will be added as a prefix.
180
181
182
        Returns
183
        -------
184
        abspath: str
185
        """
186
        if not self.has_crumbs(self._path):
187
             return op.abspath(self._path)
188
189
        splt = self._path_split()
190
        path = []
191
        if self._is_crumb_arg(splt[0]):
192
            path.append(op.abspath(op.curdir))
193
194
        if not first_is_basedir:
195
            path.append(splt[0])
196
197
        if splt[1:]:
198
            path.extend(splt[1:])
199
200
        return op.sep.join(path)
201
202
    def split(self):
203
        """ Return a list of sub-strings of the current crumb path where the
204
            path parts are separated from the crumb arguments.
205
206
        Returns
207
        -------
208
        crumbs: list of str
209
        """
210
        return self._split(self._path)
211
212
    @classmethod
213
    def from_path(cls, crumb_path):
214
        """ Create an instance of Crumb out of `crumb_path`.
215
        Parameters
216
        ----------
217
        val: str or Crumb or pathlib.Path
218
219
        Returns
220
        -------
221
        path: Crumb
222
        """
223
        if isinstance(crumb_path, (cls, Path)):
224
            return cls.copy(crumb_path)
225
226
        if isinstance(crumb_path, string_types):
227
            return cls(crumb_path)
228
        else:
229
            raise TypeError("Expected a `val` to be a `str`, got {}.".format(type(crumb_path)))
230
231
    def _lastarg(self):
232
        """ Return the name and idx of the last argument."""
233
        for arg, idx in reversed(list(self._argidx.items())):
234
            return arg, idx
235
236
    def _firstarg(self):
237
        """ Return the name and idx of the first argument."""
238
        for arg, idx in self._argidx.items():
239
            return arg, idx
240
241
    def _is_firstarg(self, arg_name):
242
        """ Return True if `arg_name` is the first argument."""
243
        # Take into account that self._argidx is OrderedDict
244
        return arg_name == self._firstarg()[0]
245
246
    def _arg_values(self, arg_name, arg_values=None):
247
        """ Return the existing values in the file system for the crumb argument
248
        with name `arg_name`.
249
        The `arg_values` must be a sequence with the tuples with valid values of the dependent
250
        (previous in the path) crumb arguments.
251
        The format of `arg_values` work in such a way that `self._path.format(dict(arg_values[0]))`
252
        would give me a valid path or crumb.
253
        Parameters
254
        ----------
255
        arg_name: str
256
257
        arg_values: list of tuples
258
259
        Returns
260
        -------
261
        vals: list of tuples
262
263
        Raises
264
        ------
265
        ValueError: if `arg_values` is None and `arg_name` is not the
266
        first crumb argument in self._path
267
268
        IOError: if this crosses to any path that is non-existing.
269
        """
270
        if arg_values is None and not self._is_firstarg(arg_name):
271
            raise ValueError("Cannot get the list of values for {} if"
272
                             " the previous arguments are not filled"
273
                             " in `paths`.".format(arg_name))
274
275
        aidx = self._find_arg(arg_name)
276
277
        # check if the path is absolute, do it absolute
278
        apath = self._abspath()
279
        splt = apath.split(op.sep)
280
281
        if aidx == len(splt) - 1:  # this means we have to list files too
282
            just_dirs = False
283
        else:  # this means we have to list folders
284
            just_dirs = True
285
286
        vals = []
287
        if arg_values is None:
288
            base = op.sep.join(splt[:aidx])
289
            vals = [[(arg_name, val)] for val in list_children(base, just_dirs=just_dirs, ignore=self._ignore)]
290
        else:
291
            for aval in arg_values:
292
                #  create the part of the crumb path that is already specified
293
                path = self._split(self._replace(self._path, **dict(aval)))[0]
294
295
                #  list the children of `path`
296
                subpaths = list_children(path, just_dirs=just_dirs, ignore=self._ignore)
297
298
                #  extend `val` tuples with the new list of values for `aval`
299
                vals.extend([aval + [(arg_name, sp)] for sp in subpaths])
300
301
        return vals
302
303
    def setitems(self, **kwargs):
304
        """ Set the crumb arguments in path to the given values in kwargs and updates
305
        self accordingly.
306
        Parameters
307
        ----------
308
        kwargs: strings
309
310
        Returns
311
        -------
312
        crumb: Crumb
313
        """
314
        for arg_name in kwargs:
315
            if arg_name not in self._argidx:
316
                raise KeyError("Expected `arg_name` to be one of ({}),"
317
                               " got {}.".format(list(self._argidx), arg_name))
318
319
        self.path = self._replace(self._path, **kwargs)
320
        argval = deepcopy(self._argval)
321
322
        self._update()
323
        self._argval = argval
324
        self._argval.update(**kwargs)
325
        return self
326
327
    def replace(self, **kwargs):
328
        """ Return a copy of self with the crumb arguments in
329
        `kwargs` replaced by its values.
330
        Parameters
331
        ----------
332
        kwargs: strings
333
334
        Returns
335
        -------
336
        crumb:
337
        """
338
        cr = self.copy(self)
339
        return cr.setitems(**kwargs)
340
341
    def _arg_deps(self, arg_name):
342
        """ Return a subdict of `self._argidx` with the
343
         values from the crumb arguments that come before
344
         `arg_name` in the crumb path.
345
        Parameters
346
        ----------
347
        arg_name: str
348
349
        Returns
350
        -------
351
        arg_deps: Mapping[str, int]
352
        """
353
        argidx = self._find_arg(arg_name)
354
        return OrderedDict([(arg, idx) for arg, idx in self._argidx.items() if idx <= argidx])
355
356
    def values_map(self, arg_name, check_exists=False):
357
        """ Return a list of tuples of crumb arguments with their values.
358
359
        Parameters
360
        ----------
361
        arg_name: str
362
363
        check_exists: bool
364
365
        Returns
366
        -------
367
        values_map: list of lists of 2-tuples
368
        """
369
        arg_deps = self._arg_deps(arg_name)
370
        values_map = None
371
        for arg in arg_deps:
372
            values_map = self._arg_values(arg, values_map)
373
374
        if check_exists:
375
            paths = [self.from_path(path) for path in self._build_paths(values_map)]
376
            values_map_checked = [args for args, path in zip(values_map, paths) if path.exists()]
377
        else:
378
            values_map_checked = values_map
379
380
        return values_map_checked
381
382
    def _build_paths(self, values_map, make_crumbs=False):
383
        """ Return a list of paths from each tuple of args from `values_map`
384
        Parameters
385
        ----------
386
        values_map: list of sequences of 2-tuple
387
388
        make_crumbs: bool
389
            If `make_crumbs` is True will create a Crumb for
390
            each element of the result.
391
392
        Returns
393
        -------
394
        paths: list of str or list of Crumb
395
        """
396
        if make_crumbs:
397
            return [self.replace(**dict(val)) for val in values_map]
398
        else:
399
            return [self._replace(self._path, **dict(val)) for val in values_map]
400
401
    def ls(self, arg_name, fullpath=True, make_crumbs=True, check_exists=False):
402
        """ Return the list of values for the argument crumb `arg_name`.
403
        This will also unfold any other argument crumb that appears before in the
404
        path.
405
        Parameters
406
        ----------
407
        arg_name: str
408
            Name of the argument crumb to be unfolded.
409
410
        fullpath: bool
411
            If True will build the full path of the crumb path, will also append
412
            the rest of crumbs not unfolded.
413
            If False will only return the values for the argument with name
414
            `arg_name`.
415
416
        make_crumbs: bool
417
            If `fullpath` and `make_crumbs` is True will create a Crumb for
418
            each element of the result.
419
420
        check_exists: bool
421
            If True will return only str, Crumb or Path if it exists
422
            in the file path, otherwise it may create file paths
423
            that don't have to exist.
424
425
        Returns
426
        -------
427
        values: list of str or Crumb
428
429
        Examples
430
        --------
431
        >>> cr = Crumb(op.join(op.expanduser('~'), '{user_folder}'))
432
        >>> user_folders = cr.ls('user_folder',fullpath=True,make_crumbs=True)
433
        """
434
        if arg_name not in self._argidx:
435
            raise ValueError("Expected `arg_name` to be one of ({}),"
436
                             " got {}.".format(tuple(self._argidx) + tuple(self._argval),
437
                                               arg_name))
438
439
        start_sym, _ = self._start_end_syms
440
441
        # if the first chunk of the path is a parameter, I am not interested in this (for now)
442
        if self._path.startswith(start_sym):
443
            raise NotImplementedError("Can't list paths that start with an argument.")
444
445
        if make_crumbs and not fullpath:
446
            raise ValueError("`make_crumbs` can only work if `fullpath` is also True.")
447
448
        values_map = self.values_map(arg_name, check_exists=check_exists)
449
450
        if fullpath:
451
            paths = sorted(self._build_paths(values_map, make_crumbs=make_crumbs))
452
453
        else:
454
            paths = [dict(val)[arg_name] for val in values_map]
455
456
        return paths
457
458
    def _remaining_deps(self, arg_names):
459
        """ Return the name of the arguments that are dependencies of `arg_names`.
460
        Parameters
461
        ----------
462
        arg_names: Sequence[str]
463
464
        Returns
465
        -------
466
        rem_deps: Sequence[str]
467
        """
468
        started = False
469
        rem_deps = []
470
        for an in reversed(list(self._argidx.keys())):  # take into account that argidx is ordered
471
            if an in arg_names:
472
                started = True
473
            else:
474
                if started:
475
                    rem_deps.append(an)
476
477
        return rem_deps
478
479
    def touch(self):
480
        """ Create a leaf directory and all intermediate ones using the non
481
        crumbed part of `crumb_path`.
482
        If the target directory already exists, raise an IOError if exist_ok
483
        is False. Otherwise no exception is raised.
484
        Parameters
485
        ----------
486
        crumb_path: str
487
488
        exist_ok: bool
489
            Default = True
490
491
        Returns
492
        -------
493
        nupath: str
494
            The new path created.
495
        """
496
        return self._touch(self._path)
497
498
    def joinpath(self, suffix):
499
        """ Return a copy of the current crumb with the `suffix` path appended.
500
        If suffix has crumb arguments, the whole crumb will be updated.
501
        Parameters
502
        ----------
503
        suffix: str
504
505
        Returns
506
        -------
507
        cr: Crumb
508
        """
509
        return Crumb(op.join(self._path, suffix))
510
511
    def exists(self):
512
        """ Return True if the current crumb path is a possibly existing path,
513
        False otherwise.
514
        Returns
515
        -------
516
        exists: bool
517
        """
518
        if not self.has_crumbs(self._path):
519
            return op.exists(str(self)) or op.islink(str(self))
520
521
        if not op.exists(self.split()[0]):
522
            return False
523
524
        last, _ = self._lastarg()
525
        paths = self.ls(last, fullpath=True, make_crumbs=False, check_exists=False)
526
527
        return all([self._split_exists(lp) for lp in paths])
528
529
    def has_files(self):
530
        """ Return True if the current crumb path has any file in its
531
        possible paths.
532
        Returns
533
        -------
534
        has_files: bool
535
        """
536
        if not op.exists(self.split()[0]):
537
            return False
538
539
        last, _ = self._lastarg()
540
        paths = self.ls(last, fullpath=True, make_crumbs=True, check_exists=True)
541
542
        return any([op.isfile(str(lp)) for lp in paths])
543
544
    def unfold(self):
545
        """ Return a list of all the existing paths until the last crumb argument.
546
        Returns
547
        -------
548
        paths: list of pathlib.Path
549
        """
550
        return self.ls(self._lastarg()[0], fullpath=True, make_crumbs=True, check_exists=True)
551
552
    def __getitem__(self, arg_name):
553
        """ Return the existing values of the crumb argument `arg_name`
554
        without removing duplicates.
555
        Parameters
556
        ----------
557
        arg_name: str
558
559
        Returns
560
        -------
561
        values: list of str
562
        """
563
        if arg_name in self._argval:
564
            return self._argval[arg_name]
565
        else:
566
            return self.ls(arg_name, fullpath=False, make_crumbs=False, check_exists=True)
567
568
    def __setitem__(self, key, value):
569
        if key not in self._argidx:
570
            raise KeyError("Expected `arg_name` to be one of ({}),"
571
                           " got {}.".format(list(self._argidx), key))
572
        _ = self.setitems(**{key: value})
573
574
    def __ge__(self, other):
575
        return self._path >= str(other)
576
577
    def __le__(self, other):
578
        return self._path <= str(other)
579
580
    def __gt__(self, other):
581
        return self._path > str(other)
582
583
    def __lt__(self, other):
584
        return self._path < str(other)
585
586
    def __hash__(self):
587
        return self._path.__hash__()
588
589
    def __contains__(self, item):
590
        return item in self._argidx
591
592
    def __repr__(self):
593
        return '{}("{}")'.format(__class__.__name__, self._path)
594
595
    def __str__(self):
596
        return str(self._path)
597
598
    def __eq__(self, other):
599
        """ Return True if `self` and `other` are equal, False otherwise.
600
        Parameters
601
        ----------
602
        other: Crumb
603
604
        Returns
605
        -------
606
        is_equal: bool
607
        """
608
        if self._path != other._path:
609
            return False
610
611
        if self._argidx != other._argidx:
612
            return False
613
614
        if self._argval != other._argval:
615
            return False
616
617
        if self._ignore != other._ignore:
618
            return False
619
620
        return True
621
622