Total Complexity | 98 |
Total Lines | 568 |
Duplicated Lines | 0 % |
Complex classes like hansel.Crumb often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # -*- coding: utf-8 -*- |
||
25 | class Crumb(object): |
||
26 | """ The crumb path model class. |
||
27 | Parameters |
||
28 | ---------- |
||
29 | crumb_path: str |
||
30 | A file or folder path with crumb arguments. See Examples. |
||
31 | |||
32 | ignore_list: sequence of str |
||
33 | A list of `fnmatch` patterns of filenames to be ignored. |
||
34 | |||
35 | Examples |
||
36 | -------- |
||
37 | >>> crumb = Crumb("{base_dir}/raw/{subject_id}/{session_id}/{modality}/{image}") |
||
38 | >>> cr = Crumb(op.join(op.expanduser('~'), '{user_folder}')) |
||
39 | """ |
||
40 | # symbols indicating start and end of a crumb argument |
||
41 | _start_end_syms = ('{', '}') |
||
42 | |||
43 | # specify partial functions from _utils with _arg_start_sym and _arg_end_sym |
||
44 | # everything would be much simpler if I hardcoded these symbols but I still |
||
45 | # feel that this flexibility is nice to have. |
||
46 | # _arg_format = partial(_arg_format, start_sym=_arg_start_sym, end_sym=_arg_end_sym) |
||
47 | _is_crumb_arg = partial(_is_crumb_arg, start_end_syms=_start_end_syms) |
||
48 | _arg_name = partial(_arg_name, start_end_syms=_start_end_syms) |
||
49 | is_valid = partial(is_valid, start_end_syms=_start_end_syms) |
||
50 | has_crumbs = partial(has_crumbs, start_end_syms=_start_end_syms) |
||
51 | _replace = partial(_replace, start_end_syms=_start_end_syms) |
||
52 | _split = partial(_split, start_end_syms=_start_end_syms) |
||
53 | _touch = partial(_touch, start_end_syms=_start_end_syms) |
||
54 | _split_exists = partial(_split_exists, start_end_syms=_start_end_syms) |
||
55 | |||
56 | |||
57 | def __init__(self, crumb_path, ignore_list=()): |
||
58 | self._path = _get_path(crumb_path) |
||
59 | self._argidx = OrderedDict() |
||
60 | self._ignore = ignore_list |
||
61 | self._update() |
||
62 | |||
63 | @property |
||
64 | def path(self): |
||
65 | """Return the current crumb path string.""" |
||
66 | return self._path |
||
67 | |||
68 | @path.setter |
||
69 | def path(self, value): |
||
70 | """ Set the current crumb path string and updates the internal members. |
||
71 | Parameters |
||
72 | ---------- |
||
73 | value: str |
||
74 | A file or folder path with crumb arguments. See Examples in class docstring. |
||
75 | """ |
||
76 | self._path = value |
||
77 | self._update() |
||
78 | |||
79 | def _check(self): |
||
80 | if not self.is_valid(self._path): |
||
81 | raise ValueError("The current crumb path has errors, got {}.".format(self.path)) |
||
82 | |||
83 | def _update(self): |
||
84 | """ Clean up, parse the current crumb path and fill the internal |
||
85 | members for functioning.""" |
||
86 | self._clean() |
||
87 | self._check() |
||
88 | self._set_argidx() |
||
89 | # self._set_replace_func() |
||
90 | |||
91 | def _clean(self): |
||
92 | """ Clean up the private utility members, i.e., _argidx. """ |
||
93 | self._argidx = OrderedDict() |
||
94 | |||
95 | @classmethod |
||
96 | def copy(cls, crumb): |
||
97 | """ Return a deep copy of the given `crumb`. |
||
98 | Parameters |
||
99 | ---------- |
||
100 | crumb: str or Crumb |
||
101 | |||
102 | Returns |
||
103 | ------- |
||
104 | copy: Crumb |
||
105 | """ |
||
106 | if isinstance(crumb, cls): |
||
107 | return cls(crumb._path, ignore_list=crumb._ignore) |
||
108 | elif isinstance(crumb, string_types): |
||
109 | return cls.from_path(crumb) |
||
110 | else: |
||
111 | raise TypeError("Expected a Crumb or a str to copy, got {}.".format(type(crumb))) |
||
112 | |||
113 | def _set_argidx(self): |
||
114 | """ Initialize the self._argidx dict. It holds arg_name -> index. |
||
115 | The index is the position in the whole `_path.split(op.sep)` where each argument is. |
||
116 | """ |
||
117 | fs = self._path_split() |
||
118 | for idx, f in enumerate(fs): |
||
119 | if self._is_crumb_arg(f): |
||
120 | self._argidx[self._arg_name(f)] = idx |
||
121 | |||
122 | def _find_arg(self, arg_name): |
||
123 | """ Return the index in the current path of the crumb |
||
124 | argument with name `arg_name`. |
||
125 | """ |
||
126 | return self._argidx.get(arg_name, -1) |
||
127 | |||
128 | def isabs(self): |
||
129 | """ Return True if the current crumb path has an |
||
130 | absolute path, False otherwise. |
||
131 | This means that if it is valid and does not start with a `op.sep` character |
||
132 | or hard disk letter. |
||
133 | """ |
||
134 | if not self.is_valid(self._path): |
||
135 | raise ValueError("The given crumb path has errors, got {}.".format(self.path)) |
||
136 | |||
137 | start_sym, _ = self._start_end_syms |
||
138 | subp = self._path.split(start_sym)[0] |
||
139 | return op.isabs(subp) |
||
140 | |||
141 | def abspath(self, first_is_basedir=False): |
||
142 | """ Return a copy of `self` with an absolute crumb path. |
||
143 | Add as prefix the absolute path to the current directory if the current |
||
144 | crumb is not absolute. |
||
145 | Parameters |
||
146 | ---------- |
||
147 | first_is_basedir: bool |
||
148 | If True and the current crumb path starts with a crumb argument and first_is_basedir, |
||
149 | the first argument will be replaced by the absolute path to the current dir, |
||
150 | otherwise the absolute path to the current dir will be added as a prefix. |
||
151 | |||
152 | |||
153 | Returns |
||
154 | ------- |
||
155 | abs_crumb: Crumb |
||
156 | """ |
||
157 | if not self.is_valid(self._path): |
||
158 | raise ValueError("The given crumb path has errors, got {}.".format(self.path)) |
||
159 | |||
160 | if self.isabs(): |
||
161 | return deepcopy(self) |
||
162 | |||
163 | return self.copy(self._abspath(first_is_basedir=first_is_basedir)) |
||
164 | |||
165 | def _path_split(self): |
||
166 | return self._path.split(op.sep) |
||
167 | |||
168 | def _abspath(self, first_is_basedir=False): |
||
169 | """ Return the absolute path of the current crumb path. |
||
170 | Parameters |
||
171 | ---------- |
||
172 | first_is_basedir: bool |
||
173 | If True and the current crumb path starts with a crumb argument and first_is_basedir, |
||
174 | the first argument will be replaced by the absolute path to the current dir, |
||
175 | otherwise the absolute path to the current dir will be added as a prefix. |
||
176 | |||
177 | |||
178 | Returns |
||
179 | ------- |
||
180 | abspath: str |
||
181 | """ |
||
182 | if not self.has_crumbs(self._path): |
||
183 | return op.abspath(self._path) |
||
184 | |||
185 | splt = self._path_split() |
||
186 | path = [] |
||
187 | if self._is_crumb_arg(splt[0]): |
||
188 | path.append(op.abspath(op.curdir)) |
||
189 | |||
190 | if not first_is_basedir: |
||
191 | path.append(splt[0]) |
||
192 | |||
193 | if splt[1:]: |
||
194 | path.extend(splt[1:]) |
||
195 | |||
196 | return op.sep.join(path) |
||
197 | |||
198 | def split(self): |
||
199 | """ Return a list of sub-strings of the current crumb path where the |
||
200 | path parts are separated from the crumb arguments. |
||
201 | |||
202 | Returns |
||
203 | ------- |
||
204 | crumbs: list of str |
||
205 | """ |
||
206 | return self._split(self._path) |
||
207 | |||
208 | @classmethod |
||
209 | def from_path(cls, crumb_path): |
||
210 | """ Create an instance of Crumb out of `crumb_path`. |
||
211 | Parameters |
||
212 | ---------- |
||
213 | val: str or Crumb or pathlib.Path |
||
214 | |||
215 | Returns |
||
216 | ------- |
||
217 | path: Crumb |
||
218 | """ |
||
219 | if isinstance(crumb_path, (cls, Path)): |
||
220 | return cls.copy(crumb_path) |
||
221 | |||
222 | if isinstance(crumb_path, string_types): |
||
223 | return cls(crumb_path) |
||
224 | else: |
||
225 | raise TypeError("Expected a `val` to be a `str`, got {}.".format(type(crumb_path))) |
||
226 | |||
227 | def _lastarg(self): |
||
228 | """ Return the name and idx of the last argument.""" |
||
229 | for arg, idx in reversed(list(self._argidx.items())): |
||
230 | return arg, idx |
||
231 | |||
232 | def _firstarg(self): |
||
233 | """ Return the name and idx of the first argument.""" |
||
234 | for arg, idx in self._argidx.items(): |
||
235 | return arg, idx |
||
236 | |||
237 | def _is_firstarg(self, arg_name): |
||
238 | """ Return True if `arg_name` is the first argument.""" |
||
239 | # Take into account that self._argidx is OrderedDict |
||
240 | return arg_name == self._firstarg()[0] |
||
241 | |||
242 | def _arg_values(self, arg_name, arg_values=None): |
||
243 | """ Return the existing values in the file system for the crumb argument |
||
244 | with name `arg_name`. |
||
245 | The `arg_values` must be a sequence with the tuples with valid values of the dependent |
||
246 | (previous in the path) crumb arguments. |
||
247 | The format of `arg_values` work in such a way that `self._path.format(dict(arg_values[0]))` |
||
248 | would give me a valid path or crumb. |
||
249 | Parameters |
||
250 | ---------- |
||
251 | arg_name: str |
||
252 | |||
253 | arg_values: list of tuples |
||
254 | |||
255 | Returns |
||
256 | ------- |
||
257 | vals: list of tuples |
||
258 | |||
259 | Raises |
||
260 | ------ |
||
261 | ValueError: if `arg_values` is None and `arg_name` is not the |
||
262 | first crumb argument in self._path |
||
263 | |||
264 | IOError: if this crosses to any path that is non-existing. |
||
265 | """ |
||
266 | if arg_values is None and not self._is_firstarg(arg_name): |
||
267 | raise ValueError("Cannot get the list of values for {} if" |
||
268 | " the previous arguments are not filled" |
||
269 | " in `paths`.".format(arg_name)) |
||
270 | |||
271 | aidx = self._find_arg(arg_name) |
||
272 | |||
273 | # check if the path is absolute, do it absolute |
||
274 | apath = self._abspath() |
||
275 | splt = apath.split(op.sep) |
||
276 | |||
277 | if aidx == len(splt) - 1: # this means we have to list files too |
||
278 | just_dirs = False |
||
279 | else: # this means we have to list folders |
||
280 | just_dirs = True |
||
281 | |||
282 | vals = [] |
||
283 | if arg_values is None: |
||
284 | base = op.sep.join(splt[:aidx]) |
||
285 | vals = [[(arg_name, val)] for val in list_children(base, just_dirs=just_dirs, ignore=self._ignore)] |
||
286 | else: |
||
287 | for aval in arg_values: |
||
288 | # create the part of the crumb path that is already specified |
||
289 | path = self._split(self._replace(self._path, **dict(aval)))[0] |
||
290 | |||
291 | # list the children of `path` |
||
292 | subpaths = list_children(path, just_dirs=just_dirs, ignore=self._ignore) |
||
293 | |||
294 | # extend `val` tuples with the new list of values for `aval` |
||
295 | vals.extend([aval + [(arg_name, sp)] for sp in subpaths]) |
||
296 | |||
297 | return vals |
||
298 | |||
299 | def replace(self, **kwargs): |
||
300 | """ Return a copy of self with the crumb arguments in |
||
301 | `kwargs` replaced by its values. |
||
302 | Parameters |
||
303 | ---------- |
||
304 | kwargs: strings |
||
305 | |||
306 | Returns |
||
307 | ------- |
||
308 | crumb: |
||
309 | """ |
||
310 | for arg_name in kwargs: |
||
311 | if arg_name not in self._argidx: |
||
312 | raise KeyError("Expected `arg_name` to be one of ({})," |
||
313 | " got {}.".format(list(self._argidx), arg_name)) |
||
314 | |||
315 | cr = self.copy(self) |
||
316 | cr._path = cr._replace(self._path, **kwargs) |
||
317 | return Crumb.from_path(cr._path) |
||
318 | |||
319 | def _arg_deps(self, arg_name): |
||
320 | """ Return a subdict of `self._argidx` with the |
||
321 | values from the crumb arguments that come before |
||
322 | `arg_name` in the crumb path. |
||
323 | Parameters |
||
324 | ---------- |
||
325 | arg_name: str |
||
326 | |||
327 | Returns |
||
328 | ------- |
||
329 | arg_deps: Mapping[str, int] |
||
330 | """ |
||
331 | argidx = self._find_arg(arg_name) |
||
332 | return OrderedDict([(arg, idx) for arg, idx in self._argidx.items() if idx <= argidx]) |
||
333 | |||
334 | def values_map(self, arg_name, check_exists=False): |
||
335 | """ Return a list of tuples of crumb arguments with their values. |
||
336 | |||
337 | Parameters |
||
338 | ---------- |
||
339 | arg_name: str |
||
340 | |||
341 | check_exists: bool |
||
342 | |||
343 | Returns |
||
344 | ------- |
||
345 | values_map: list of lists of 2-tuples |
||
346 | """ |
||
347 | arg_deps = self._arg_deps(arg_name) |
||
348 | values_map = None |
||
349 | for arg in arg_deps: |
||
350 | values_map = self._arg_values(arg, values_map) |
||
351 | |||
352 | if check_exists: |
||
353 | paths = [self.from_path(path) for path in self._build_paths(values_map)] |
||
354 | values_map_checked = [args for args, path in zip(values_map, paths) if path.exists()] |
||
355 | else: |
||
356 | values_map_checked = values_map |
||
357 | |||
358 | return values_map_checked |
||
359 | |||
360 | def _build_paths(self, values_map): |
||
361 | """ Return a list of paths from each tuple of args from `values_map` |
||
362 | Parameters |
||
363 | ---------- |
||
364 | values_map: list of sequences of 2-tuple |
||
365 | |||
366 | Returns |
||
367 | ------- |
||
368 | paths: list of str |
||
369 | """ |
||
370 | return [self._replace(self._path, **dict(val)) for val in values_map] |
||
371 | |||
372 | def ls(self, arg_name, fullpath=True, make_crumbs=True, check_exists=False): |
||
373 | """ |
||
374 | Return the list of values for the argument crumb `arg_name`. |
||
375 | This will also unfold any other argument crumb that appears before in the |
||
376 | path. |
||
377 | Parameters |
||
378 | ---------- |
||
379 | arg_name: str |
||
380 | Name of the argument crumb to be unfolded. |
||
381 | |||
382 | fullpath: bool |
||
383 | If True will build the full path of the crumb path, will also append |
||
384 | the rest of crumbs not unfolded. |
||
385 | If False will only return the values for the argument with name |
||
386 | `arg_name`. |
||
387 | |||
388 | make_crumbs: bool |
||
389 | If `fullpath` and `make_crumbs` is True will create a Crumb for |
||
390 | each element of the result. |
||
391 | |||
392 | check_exists: bool |
||
393 | If True will return only str, Crumb or Path if it exists |
||
394 | in the file path, otherwise it may create file paths |
||
395 | that don't have to exist. |
||
396 | |||
397 | Returns |
||
398 | ------- |
||
399 | values: list of str or Crumb |
||
400 | |||
401 | Examples |
||
402 | -------- |
||
403 | >>> cr = Crumb(op.join(op.expanduser('~'), '{user_folder}')) |
||
404 | >>> user_folders = cr.ls('user_folder',fullpath=True,make_crumbs=True) |
||
405 | """ |
||
406 | if arg_name not in self._argidx: |
||
407 | raise ValueError("Expected `arg_name` to be one of ({})," |
||
408 | " got {}.".format(list(self._argidx), arg_name)) |
||
409 | |||
410 | start_sym, _ = self._start_end_syms |
||
411 | |||
412 | # if the first chunk of the path is a parameter, I am not interested in this (for now) |
||
413 | if self._path.startswith(start_sym): |
||
414 | raise NotImplementedError("Can't list paths that starts" |
||
415 | " with an argument.") |
||
416 | |||
417 | if make_crumbs and not fullpath: |
||
418 | raise ValueError("`make_crumbs` can only work if `fullpath` is also True.") |
||
419 | |||
420 | values_map = self.values_map(arg_name, check_exists=check_exists) |
||
421 | |||
422 | if not fullpath and not make_crumbs: |
||
423 | paths = [dict(val)[arg_name] for val in values_map] |
||
424 | |||
425 | elif fullpath and not make_crumbs: |
||
426 | paths = sorted(self._build_paths(values_map)) |
||
427 | |||
428 | elif fullpath and make_crumbs: |
||
429 | paths = sorted(self._build_paths(values_map)) |
||
430 | paths = [self.from_path(path) for path in paths] |
||
431 | |||
432 | return paths |
||
433 | |||
434 | def _remaining_deps(self, arg_names): |
||
435 | """ Return the name of the arguments that are dependencies of `arg_names`. |
||
436 | Parameters |
||
437 | ---------- |
||
438 | arg_names: Sequence[str] |
||
439 | |||
440 | Returns |
||
441 | ------- |
||
442 | rem_deps: Sequence[str] |
||
443 | """ |
||
444 | started = False |
||
445 | rem_deps = [] |
||
446 | for an in reversed(list(self._argidx.keys())): # take into account that argidx is ordered |
||
447 | if an in arg_names: |
||
448 | started = True |
||
449 | else: |
||
450 | if started: |
||
451 | rem_deps.append(an) |
||
452 | |||
453 | return rem_deps |
||
454 | |||
455 | def touch(self): |
||
456 | """ Create a leaf directory and all intermediate ones |
||
457 | using the non crumbed part of `crumb_path`. |
||
458 | If the target directory already exists, raise an IOError |
||
459 | if exist_ok is False. Otherwise no exception is raised. |
||
460 | Parameters |
||
461 | ---------- |
||
462 | crumb_path: str |
||
463 | |||
464 | exist_ok: bool |
||
465 | Default = True |
||
466 | |||
467 | Returns |
||
468 | ------- |
||
469 | nupath: str |
||
470 | The new path created. |
||
471 | """ |
||
472 | return self._touch(self._path) |
||
473 | |||
474 | def joinpath(self, suffix): |
||
475 | """ Return a copy of the current crumb with the `suffix` path appended. |
||
476 | If suffix has crumb arguments, the whole crumb will be updated. |
||
477 | Parameters |
||
478 | ---------- |
||
479 | suffix: str |
||
480 | |||
481 | Returns |
||
482 | ------- |
||
483 | cr: Crumb |
||
484 | """ |
||
485 | return Crumb(op.join(self._path, suffix)) |
||
486 | |||
487 | def exists(self): |
||
488 | """ Return True if the current crumb path is a possibly existing path, |
||
489 | False otherwise. |
||
490 | Returns |
||
491 | ------- |
||
492 | exists: bool |
||
493 | """ |
||
494 | if not self.has_crumbs(self._path): |
||
495 | return op.exists(str(self)) or op.islink(str(self)) |
||
496 | |||
497 | if not op.exists(self.split()[0]): |
||
498 | return False |
||
499 | |||
500 | last, _ = self._lastarg() |
||
501 | paths = self.ls(last, fullpath=True, make_crumbs=False, check_exists=False) |
||
502 | |||
503 | return all([self._split_exists(lp) for lp in paths]) |
||
504 | |||
505 | def has_files(self): |
||
506 | """ Return True if the current crumb path has any file in its |
||
507 | possible paths. |
||
508 | Returns |
||
509 | ------- |
||
510 | has_files: bool |
||
511 | """ |
||
512 | if not op.exists(self.split()[0]): |
||
513 | return False |
||
514 | |||
515 | last, _ = self._lastarg() |
||
516 | paths = self.ls(last, fullpath=True, make_crumbs=True, check_exists=True) |
||
517 | |||
518 | return any([op.isfile(str(lp)) for lp in paths]) |
||
519 | |||
520 | def unfold(self): |
||
521 | """ Return a list of all the existing paths until the last crumb argument. |
||
522 | Returns |
||
523 | ------- |
||
524 | paths: list of pathlib.Path |
||
525 | """ |
||
526 | return self.ls(self._lastarg()[0], fullpath=True, make_crumbs=True, check_exists=True) |
||
527 | |||
528 | def __getitem__(self, arg_name): |
||
529 | """ Return the existing values of the crumb argument `arg_name` |
||
530 | without removing duplicates. |
||
531 | Parameters |
||
532 | ---------- |
||
533 | arg_name: str |
||
534 | |||
535 | Returns |
||
536 | ------- |
||
537 | values: list of str |
||
538 | """ |
||
539 | return self.ls(arg_name, fullpath=False, make_crumbs=False, check_exists=True) |
||
540 | |||
541 | def __setitem__(self, key, value): |
||
542 | if key not in self._argidx: |
||
543 | raise KeyError("Expected `arg_name` to be one of ({})," |
||
544 | " got {}.".format(list(self._argidx), key)) |
||
545 | |||
546 | self._path = self._replace(self._path, **{key: value}) |
||
547 | self._update() |
||
548 | |||
549 | def __ge__(self, other): |
||
550 | return self._path >= str(other) |
||
551 | |||
552 | def __le__(self, other): |
||
553 | return self._path <= str(other) |
||
554 | |||
555 | def __gt__(self, other): |
||
556 | return self._path > str(other) |
||
557 | |||
558 | def __lt__(self, other): |
||
559 | return self._path < str(other) |
||
560 | |||
561 | def __hash__(self): |
||
562 | return self._path.__hash__() |
||
563 | |||
564 | def __contains__(self, item): |
||
565 | return item in self._argidx |
||
566 | |||
567 | def __repr__(self): |
||
568 | return '{}("{}")'.format(__class__.__name__, self._path) |
||
569 | |||
570 | def __str__(self): |
||
571 | return str(self._path) |
||
572 | |||
573 | def __eq__(self, other): |
||
574 | """ Return True if `self` and `other` are equal, False otherwise. |
||
575 | Parameters |
||
576 | ---------- |
||
577 | other: Crumb |
||
578 | |||
579 | Returns |
||
580 | ------- |
||
581 | is_equal: bool |
||
582 | """ |
||
583 | if self._path != other._path: |
||
584 | return False |
||
585 | |||
586 | if self._argidx != other._argidx: |
||
587 | return False |
||
588 | |||
589 | if self._ignore != other._ignore: |
||
590 | return False |
||
591 | |||
592 | return True |
||
593 | |||
594 |