| Total Complexity | 123 |
| Total Lines | 756 |
| Duplicated Lines | 0 % |
Complex classes like hansel.Crumb often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | # -*- coding: utf-8 -*- |
||
| 38 | class Crumb(object): |
||
| 39 | """ The crumb path model class. |
||
| 40 | Parameters |
||
| 41 | ---------- |
||
| 42 | crumb_path: str |
||
| 43 | A file or folder path with crumb arguments. See Examples. |
||
| 44 | |||
| 45 | ignore_list: sequence of str |
||
| 46 | A list of `fnmatch` patterns of filenames to be ignored. |
||
| 47 | |||
| 48 | regex: str |
||
| 49 | Choices: 'fnmatch', 're' or 're.ignorecase' |
||
| 50 | If 'fnmatch' will use fnmatch regular expressions to |
||
| 51 | match any expression you may have in a crumb argument. |
||
| 52 | If 're' will use re.match. |
||
| 53 | If 're.ignorecase' will use re.match and pass re.IGNORE_CASE to re.compile. |
||
| 54 | |||
| 55 | Examples |
||
| 56 | -------- |
||
| 57 | >>> crumb = Crumb("{base_dir}/raw/{subject_id}/{session_id}/{modality}/{image}") |
||
| 58 | >>> cr = Crumb(op.join(op.expanduser('~'), '{user_folder}')) |
||
| 59 | """ |
||
| 60 | # symbols indicating start and end of a crumb argument |
||
| 61 | _start_end_syms = ('{', '}') |
||
| 62 | _regex_sym = ':' |
||
| 63 | |||
| 64 | # specify partial functions from _utils with _arg_start_sym and _arg_end_sym |
||
| 65 | # everything would be much simpler if I hardcoded these symbols but I still |
||
| 66 | # feel that this flexibility is nice to have. |
||
| 67 | _is_crumb_arg = partial(_is_crumb_arg, start_end_syms=_start_end_syms) |
||
| 68 | _arg_params = partial(_arg_params, start_end_syms=_start_end_syms, reg_sym=_regex_sym) |
||
| 69 | is_valid = partial(is_valid, start_end_syms=_start_end_syms) |
||
| 70 | has_crumbs = partial(has_crumbs, start_end_syms=_start_end_syms) |
||
| 71 | _split = partial(_split, start_end_syms=_start_end_syms) |
||
| 72 | _touch = partial(_touch, start_end_syms=_start_end_syms) |
||
| 73 | _split_exists = partial(_split_exists, start_end_syms=_start_end_syms) |
||
| 74 | |||
| 75 | def __init__(self, crumb_path, ignore_list=None, regex='fnmatch'): |
||
| 76 | self._path = _get_path(crumb_path) |
||
| 77 | self._argidx = OrderedDict() # in which order the crumb argument appears |
||
| 78 | self._argval = {} # what is the value of the argument in the current path, if any has been set. |
||
| 79 | self.patterns = {} # what is the pattern set for the argument, if any. This is left public for the user. |
||
| 80 | self._re_method = regex |
||
| 81 | self._re_args = None |
||
| 82 | |||
| 83 | if ignore_list is None: |
||
| 84 | ignore_list = [] |
||
| 85 | |||
| 86 | self._ignore = ignore_list |
||
| 87 | self._update() |
||
| 88 | |||
| 89 | @property |
||
| 90 | def path(self): |
||
| 91 | """Return the current crumb path string.""" |
||
| 92 | return self._path |
||
| 93 | |||
| 94 | @path.setter |
||
| 95 | def path(self, value): |
||
| 96 | """ Set the current crumb path string and updates the internal members. |
||
| 97 | Parameters |
||
| 98 | ---------- |
||
| 99 | value: str |
||
| 100 | A file or folder path with crumb arguments. See Examples in class docstring. |
||
| 101 | """ |
||
| 102 | self._path = value |
||
| 103 | self._update() |
||
| 104 | |||
| 105 | def _open_arg_items(self): |
||
| 106 | """ Return an iterator to the crumb _argidx items in `self` that have not been replaced yet. |
||
| 107 | In the same order as they appear in the crumb path. |
||
| 108 | |||
| 109 | Returns |
||
| 110 | ------- |
||
| 111 | crumb_args: set of str |
||
| 112 | |||
| 113 | Note |
||
| 114 | ---- |
||
| 115 | I know that there is shorter/faster ways to program this but I wanted to maintain the |
||
| 116 | order of the arguments in argidx in the result of this function. |
||
| 117 | """ |
||
| 118 | for arg_name, idx in self._argidx.items(): |
||
| 119 | if arg_name not in self._argval: |
||
| 120 | yield arg_name, idx |
||
| 121 | |||
| 122 | def has_set(self, arg_name): |
||
| 123 | """ Return True if the argument `arg_name` has been set to a specific value, |
||
| 124 | False if it is still a crumb argument.""" |
||
| 125 | return arg_name not in self.open_args() |
||
| 126 | |||
| 127 | def open_args(self): |
||
| 128 | """ Return an iterator to the crumb argument names in `self` that have not been replaced yet. |
||
| 129 | In the same order as they appear in the crumb path. |
||
| 130 | |||
| 131 | Returns |
||
| 132 | ------- |
||
| 133 | crumb_args: set of str |
||
| 134 | |||
| 135 | Note |
||
| 136 | ---- |
||
| 137 | I know that there is shorter/faster ways to program this but I wanted to maintain the |
||
| 138 | order of the arguments in argidx in the result of this function. |
||
| 139 | """ |
||
| 140 | for arg_name, _ in self._open_arg_items(): |
||
| 141 | yield arg_name |
||
| 142 | |||
| 143 | def all_args(self): |
||
| 144 | """ Return an iterator to all the crumb argument names in `self`, first the open ones and then the |
||
| 145 | replaced ones. |
||
| 146 | |||
| 147 | Returns |
||
| 148 | ------- |
||
| 149 | crumb_args: set of str |
||
| 150 | """ |
||
| 151 | for arg_name in self._argidx.keys(): |
||
| 152 | yield arg_name |
||
| 153 | |||
| 154 | def _check(self): |
||
| 155 | """ Raise ValueError if the path of the Crumb has errors using `self.is_valid`.""" |
||
| 156 | if not self.is_valid(self._path): |
||
| 157 | raise ValueError("The current crumb path has errors, got {}.".format(self.path)) |
||
| 158 | |||
| 159 | def _update(self): |
||
| 160 | """ Clean up, parse the current crumb path and fill the internal |
||
| 161 | members for functioning.""" |
||
| 162 | self._clean() |
||
| 163 | self._check() |
||
| 164 | self._set_argdicts() |
||
| 165 | self._set_match_function() |
||
| 166 | self._set_replace_function() |
||
| 167 | |||
| 168 | def _set_replace_function(self): |
||
| 169 | """ Set self._replace function as a partial function, adding regex=self.patterns.""" |
||
| 170 | self._replace = partial(_replace, |
||
| 171 | start_end_syms=self._start_end_syms, |
||
| 172 | regexes=self.patterns) |
||
| 173 | |||
| 174 | def _set_match_function(self): |
||
| 175 | """ Update self._match_filter with a regular expression |
||
| 176 | matching function depending on the value of self._re_method.""" |
||
| 177 | if self._re_method == 'fnmatch': |
||
| 178 | self._match_filter = fnmatch_filter |
||
| 179 | elif self._re_method == 're': |
||
| 180 | self._match_filter = regex_match_filter |
||
| 181 | elif self._re_method == 're.ignorecase': |
||
| 182 | self._match_filter = regex_match_filter |
||
| 183 | self._re_args = (re.IGNORECASE, ) |
||
| 184 | else: |
||
| 185 | raise ValueError('Expected regex method value to be `fnmatch`' |
||
| 186 | ' or `re`, got {}.'.format(self._re_method)) |
||
| 187 | |||
| 188 | def _clean(self): |
||
| 189 | """ Clean up the private utility members, i.e., _argidx. """ |
||
| 190 | self._argidx = OrderedDict() |
||
| 191 | |||
| 192 | @classmethod |
||
| 193 | def copy(cls, crumb): |
||
| 194 | """ Return a deep copy of the given `crumb`. |
||
| 195 | Parameters |
||
| 196 | ---------- |
||
| 197 | crumb: str or Crumb |
||
| 198 | |||
| 199 | Returns |
||
| 200 | ------- |
||
| 201 | copy: Crumb |
||
| 202 | """ |
||
| 203 | if isinstance(crumb, cls): |
||
| 204 | #nucr = deepcopy(crumb) |
||
| 205 | nucr = cls(crumb._path, ignore_list=crumb._ignore, regex=crumb._re_method) |
||
| 206 | nucr._argidx = deepcopy(crumb._argidx) |
||
| 207 | nucr._argval = deepcopy(crumb._argval) |
||
| 208 | return nucr |
||
| 209 | elif isinstance(crumb, string_types): |
||
| 210 | return cls.from_path(crumb) |
||
| 211 | else: |
||
| 212 | raise TypeError("Expected a Crumb or a str to copy, got {}.".format(type(crumb))) |
||
| 213 | |||
| 214 | def _set_argdicts(self): |
||
| 215 | """ Initialize the self._argidx dict. It holds arg_name -> index. |
||
| 216 | The index is the position in the whole `_path.split(op.sep)` where each argument is. |
||
| 217 | """ |
||
| 218 | fs = self._path_split() |
||
| 219 | for idx, f in enumerate(fs): |
||
| 220 | if self._is_crumb_arg(f): |
||
| 221 | arg_name, arg_regex = self._arg_params(f) |
||
| 222 | self._argidx[arg_name] = idx |
||
| 223 | |||
| 224 | if arg_regex is not None: |
||
| 225 | self.patterns[arg_name] = arg_regex |
||
| 226 | |||
| 227 | def _find_arg(self, arg_name): |
||
| 228 | """ Return the index in the current path of the crumb |
||
| 229 | argument with name `arg_name`. |
||
| 230 | """ |
||
| 231 | return self._argidx.get(arg_name, -1) |
||
| 232 | |||
| 233 | def isabs(self): |
||
| 234 | """ Return True if the current crumb path has an absolute path, False otherwise. |
||
| 235 | This means that its path is valid and starts with a `op.sep` character |
||
| 236 | or hard disk letter. |
||
| 237 | """ |
||
| 238 | if not self.is_valid(self._path): |
||
| 239 | raise ValueError("The given crumb path has errors, got {}.".format(self.path)) |
||
| 240 | |||
| 241 | start_sym, _ = self._start_end_syms |
||
| 242 | subp = self._path.split(start_sym)[0] |
||
| 243 | return op.isabs(subp) |
||
| 244 | |||
| 245 | def abspath(self, first_is_basedir=False): |
||
| 246 | """ Return a copy of `self` with an absolute crumb path. |
||
| 247 | Add as prefix the absolute path to the current directory if the current |
||
| 248 | crumb is not absolute. |
||
| 249 | Parameters |
||
| 250 | ---------- |
||
| 251 | first_is_basedir: bool |
||
| 252 | If True and the current crumb path starts with a crumb argument and first_is_basedir, |
||
| 253 | the first argument will be replaced by the absolute path to the current dir, |
||
| 254 | otherwise the absolute path to the current dir will be added as a prefix. |
||
| 255 | |||
| 256 | Returns |
||
| 257 | ------- |
||
| 258 | abs_crumb: Crumb |
||
| 259 | """ |
||
| 260 | if not self.is_valid(self._path): |
||
| 261 | raise ValueError("The given crumb path has errors, got {}.".format(self.path)) |
||
| 262 | |||
| 263 | if self.isabs(): |
||
| 264 | return deepcopy(self) |
||
| 265 | |||
| 266 | nucr = self.copy(self) |
||
| 267 | nucr._path = self._abspath(first_is_basedir=first_is_basedir) |
||
| 268 | return nucr |
||
| 269 | |||
| 270 | def _path_split(self): |
||
| 271 | return self._path.split(op.sep) |
||
| 272 | |||
| 273 | def _abspath(self, first_is_basedir=False): |
||
| 274 | """ Return the absolute path of the current crumb path. |
||
| 275 | Parameters |
||
| 276 | ---------- |
||
| 277 | first_is_basedir: bool |
||
| 278 | If True and the current crumb path starts with a crumb argument and first_is_basedir, |
||
| 279 | the first argument will be replaced by the absolute path to the current dir, |
||
| 280 | otherwise the absolute path to the current dir will be added as a prefix. |
||
| 281 | |||
| 282 | Returns |
||
| 283 | ------- |
||
| 284 | abspath: str |
||
| 285 | """ |
||
| 286 | if not self.has_crumbs(self._path): |
||
| 287 | return op.abspath(self._path) |
||
| 288 | |||
| 289 | splt = self._path_split() |
||
| 290 | path = [] |
||
| 291 | if self._is_crumb_arg(splt[0]): |
||
| 292 | path.append(op.abspath(op.curdir)) |
||
| 293 | |||
| 294 | if not first_is_basedir: |
||
| 295 | path.append(splt[0]) |
||
| 296 | |||
| 297 | if splt[1:]: |
||
| 298 | path.extend(splt[1:]) |
||
| 299 | |||
| 300 | return op.sep.join(path) |
||
| 301 | |||
| 302 | def split(self): |
||
| 303 | """ Return a list of sub-strings of the current crumb path where the |
||
| 304 | path parts are separated from the crumb arguments. |
||
| 305 | |||
| 306 | Returns |
||
| 307 | ------- |
||
| 308 | crumbs: list of str |
||
| 309 | """ |
||
| 310 | return self._split(self._path) |
||
| 311 | |||
| 312 | @classmethod |
||
| 313 | def from_path(cls, crumb_path): |
||
| 314 | """ Create an instance of Crumb out of `crumb_path`. |
||
| 315 | Parameters |
||
| 316 | ---------- |
||
| 317 | val: str or Crumb or pathlib.Path |
||
| 318 | |||
| 319 | Returns |
||
| 320 | ------- |
||
| 321 | path: Crumb |
||
| 322 | """ |
||
| 323 | if isinstance(crumb_path, (cls, Path)): |
||
| 324 | return cls.copy(crumb_path) |
||
| 325 | |||
| 326 | if isinstance(crumb_path, string_types): |
||
| 327 | return cls(crumb_path) |
||
| 328 | else: |
||
| 329 | raise TypeError("Expected a `val` to be a `str`, got {}.".format(type(crumb_path))) |
||
| 330 | |||
| 331 | def _last_open_arg(self): |
||
| 332 | """ Return the name and idx of the last open argument.""" |
||
| 333 | for arg, idx in reversed(list(self._open_arg_items())): |
||
| 334 | return arg, idx |
||
| 335 | |||
| 336 | def _first_open_arg(self): |
||
| 337 | """ Return the name and idx of the first open argument.""" |
||
| 338 | for arg, idx in self._open_arg_items(): |
||
| 339 | return arg, idx |
||
| 340 | |||
| 341 | def _is_first_open_arg(self, arg_name): |
||
| 342 | """ Return True if `arg_name` is the first open argument.""" |
||
| 343 | # Take into account that self._argidx is OrderedDict |
||
| 344 | return arg_name == self._first_open_arg()[0] |
||
| 345 | |||
| 346 | def _arg_values(self, arg_name, arg_values=None): |
||
| 347 | """ Return the existing values in the file system for the crumb argument |
||
| 348 | with name `arg_name`. |
||
| 349 | The `arg_values` must be a sequence with the tuples with valid values of the dependent |
||
| 350 | (previous in the path) crumb arguments. |
||
| 351 | The format of `arg_values` work in such a way that `self._path.format(dict(arg_values[0]))` |
||
| 352 | would give me a valid path or crumb. |
||
| 353 | Parameters |
||
| 354 | ---------- |
||
| 355 | arg_name: str |
||
| 356 | |||
| 357 | arg_values: list of tuples |
||
| 358 | |||
| 359 | Returns |
||
| 360 | ------- |
||
| 361 | vals: list of tuples |
||
| 362 | |||
| 363 | Raises |
||
| 364 | ------ |
||
| 365 | ValueError: if `arg_values` is None and `arg_name` is not the |
||
| 366 | first crumb argument in self._path |
||
| 367 | |||
| 368 | IOError: if this crosses to any path that is non-existing. |
||
| 369 | """ |
||
| 370 | if arg_values is None and not self._is_first_open_arg(arg_name): |
||
| 371 | raise ValueError("Cannot get the list of values for {} if" |
||
| 372 | " the previous arguments are not filled" |
||
| 373 | " in `paths`.".format(arg_name)) |
||
| 374 | |||
| 375 | aidx = self._find_arg(arg_name) |
||
| 376 | |||
| 377 | # check if the path is absolute, do it absolute |
||
| 378 | apath = self._abspath() |
||
| 379 | splt = apath.split(op.sep) |
||
| 380 | |||
| 381 | if aidx == len(splt) - 1: # this means we have to list files too |
||
| 382 | just_dirs = False |
||
| 383 | else: # this means we have to list folders |
||
| 384 | just_dirs = True |
||
| 385 | |||
| 386 | vals = [] |
||
| 387 | if arg_values is None: |
||
| 388 | base = op.sep.join(splt[:aidx]) |
||
| 389 | vals = list_subpaths(base, |
||
| 390 | just_dirs=just_dirs, |
||
| 391 | ignore=self._ignore, |
||
| 392 | pattern=self.patterns.get(arg_name, ''), |
||
| 393 | filter_func=self._match_filter, |
||
| 394 | filter_args=self._re_args) |
||
| 395 | |||
| 396 | vals = [[(arg_name, val)] for val in vals] |
||
| 397 | else: |
||
| 398 | for aval in arg_values: |
||
| 399 | # create the part of the crumb path that is already specified |
||
| 400 | path = self._split(self._replace(self._path, |
||
| 401 | **dict(aval)))[0] |
||
| 402 | |||
| 403 | paths = list_subpaths(path, |
||
| 404 | just_dirs=just_dirs, |
||
| 405 | ignore=self._ignore, |
||
| 406 | pattern=self.patterns.get(arg_name, ''), |
||
| 407 | filter_func=self._match_filter) |
||
| 408 | |||
| 409 | # extend `val` tuples with the new list of values for `aval` |
||
| 410 | vals.extend([aval + [(arg_name, sp)] for sp in paths]) |
||
| 411 | |||
| 412 | return vals |
||
| 413 | |||
| 414 | def _check_args(self, arg_names, self_args): |
||
| 415 | """ Raise a ValueError if `self_args` is empty. |
||
| 416 | Raise a KeyError if `arg_names` is not a subset of `self_args`. |
||
| 417 | """ |
||
| 418 | if not self_args: |
||
| 419 | raise ValueError('This Crumb has no remaining arguments: {}.'.format(self.path)) |
||
| 420 | |||
| 421 | if not set(arg_names).issubset(set(self_args)): |
||
| 422 | raise KeyError("Expected `arg_names` to be a subset of ({})," |
||
| 423 | " got {}.".format(list(self_args), arg_names)) |
||
| 424 | |||
| 425 | def _check_open_args(self, arg_names): |
||
| 426 | """ Raise a KeyError if any of the arguments in `arg_names` is not a crumb |
||
| 427 | argument name in `self.path`. |
||
| 428 | Parameters |
||
| 429 | ---------- |
||
| 430 | arg_names: sequence of str |
||
| 431 | Names of crumb arguments |
||
| 432 | |||
| 433 | Raises |
||
| 434 | ------ |
||
| 435 | KeyError |
||
| 436 | """ |
||
| 437 | return self._check_args(arg_names, self_args=self.open_args()) |
||
| 438 | |||
| 439 | def _update_argidx(self, **kwargs): |
||
| 440 | """ Update the argument index `self._argidx` dictionary taking into account the replacement number of splits.""" |
||
| 441 | for arg_name, value in kwargs.items(): |
||
| 442 | n_splits = len(value.split(op.sep)) |
||
| 443 | |||
| 444 | if n_splits < 1: |
||
| 445 | raise ValueError('Error reading your replacement value "{}" for ' |
||
| 446 | 'crumb argument "{}".'.format(value, arg_name)) |
||
| 447 | elif n_splits == 1: |
||
| 448 | continue |
||
| 449 | |||
| 450 | # n_splits > 1, so I have to update the position of the argument children |
||
| 451 | childs = self._arg_children(arg_name) |
||
| 452 | for child_name in childs: |
||
| 453 | self._argidx[child_name] = self._argidx[child_name] + n_splits - 1 |
||
| 454 | |||
| 455 | def set_args(self, **kwargs): |
||
| 456 | """ Set the crumb arguments in path to the given values in kwargs and update |
||
| 457 | self accordingly. |
||
| 458 | Parameters |
||
| 459 | ---------- |
||
| 460 | kwargs: strings |
||
| 461 | |||
| 462 | Returns |
||
| 463 | ------- |
||
| 464 | crumb: Crumb |
||
| 465 | """ |
||
| 466 | self._check_args(kwargs.keys(), self_args=self.all_args()) |
||
| 467 | |||
| 468 | # ignore for now the arguments that are in argval. |
||
| 469 | # TODO: never change `_path`, make the `path` property to build up the path on runtime checking argval. |
||
| 470 | for k in list(kwargs.keys()): |
||
| 471 | if k in self._argval: |
||
| 472 | kwargs.pop(k) |
||
| 473 | |||
| 474 | self._path = self._replace(self._path, **kwargs) |
||
| 475 | self._check() |
||
| 476 | |||
| 477 | self._update_argidx(**kwargs) |
||
| 478 | _dict_popitems(self.patterns, **kwargs) |
||
| 479 | self._argval.update(**kwargs) |
||
| 480 | return self |
||
| 481 | |||
| 482 | def replace(self, **kwargs): |
||
| 483 | """ Return a copy of self with the crumb arguments in |
||
| 484 | `kwargs` replaced by its values. |
||
| 485 | As an analogy to the `str.format` function this function could be called `format`. |
||
| 486 | Parameters |
||
| 487 | ---------- |
||
| 488 | kwargs: strings |
||
| 489 | |||
| 490 | Returns |
||
| 491 | ------- |
||
| 492 | crumb: |
||
| 493 | """ |
||
| 494 | cr = self.copy(self) |
||
| 495 | return cr.set_args(**kwargs) |
||
| 496 | |||
| 497 | def _arg_parents(self, arg_name): |
||
| 498 | """ Return a subdict with the open arguments name and index in `self._argidx` |
||
| 499 | that come before `arg_name` in the crumb path. Include `arg_name` himself. |
||
| 500 | Parameters |
||
| 501 | ---------- |
||
| 502 | arg_name: str |
||
| 503 | |||
| 504 | Returns |
||
| 505 | ------- |
||
| 506 | arg_deps: Mapping[str, int] |
||
| 507 | """ |
||
| 508 | argidx = self._find_arg(arg_name) |
||
| 509 | return OrderedDict([(arg, idx) for arg, idx in self._open_arg_items() if idx <= argidx]) |
||
| 510 | |||
| 511 | def _arg_children(self, arg_name): |
||
| 512 | """ Return a subdict with the open arguments name and index in `self._argidx` |
||
| 513 | that come AFTER `arg_name` in the crumb path. |
||
| 514 | Parameters |
||
| 515 | ---------- |
||
| 516 | arg_name: str |
||
| 517 | |||
| 518 | Returns |
||
| 519 | ------- |
||
| 520 | arg_deps: Mapping[str, int] |
||
| 521 | """ |
||
| 522 | argidx = self._find_arg(arg_name) |
||
| 523 | return OrderedDict([(arg, idx) for arg, idx in self._open_arg_items() if idx > argidx]) |
||
| 524 | |||
| 525 | def _args_open_parents(self, arg_names): |
||
| 526 | """ Return the name of the arguments that are dependencies of `arg_names`. |
||
| 527 | Parameters |
||
| 528 | ---------- |
||
| 529 | arg_names: Sequence[str] |
||
| 530 | |||
| 531 | Returns |
||
| 532 | ------- |
||
| 533 | rem_deps: Sequence[str] |
||
| 534 | """ |
||
| 535 | started = False |
||
| 536 | arg_dads = [] |
||
| 537 | for an in reversed(list(self.open_args())): # take into account that argidx is ordered |
||
| 538 | if an in arg_names: |
||
| 539 | started = True |
||
| 540 | else: |
||
| 541 | if started: |
||
| 542 | arg_dads.append(an) |
||
| 543 | |||
| 544 | return list(reversed(arg_dads)) |
||
| 545 | |||
| 546 | def values_map(self, arg_name, check_exists=False): |
||
| 547 | """ Return a list of tuples of crumb arguments with their values. |
||
| 548 | Parameters |
||
| 549 | ---------- |
||
| 550 | arg_name: str |
||
| 551 | |||
| 552 | check_exists: bool |
||
| 553 | |||
| 554 | Returns |
||
| 555 | ------- |
||
| 556 | values_map: list of lists of 2-tuples |
||
| 557 | I call values_map what is called `record` in pandas. It is a list of lists of 2-tuples, where each 2-tuple |
||
| 558 | has the shape (arg_name, arg_value). |
||
| 559 | """ |
||
| 560 | arg_deps = self._arg_parents(arg_name) |
||
| 561 | values_map = None |
||
| 562 | for arg in arg_deps: |
||
| 563 | values_map = self._arg_values(arg, values_map) |
||
| 564 | |||
| 565 | if check_exists: |
||
| 566 | paths = [cr for cr in self.build_paths(values_map, make_crumbs=True)] |
||
| 567 | values_map_checked = [args for args, path in zip(values_map, paths) if path.exists()] |
||
| 568 | else: |
||
| 569 | values_map_checked = values_map |
||
| 570 | |||
| 571 | return values_map_checked |
||
| 572 | |||
| 573 | def build_paths(self, values_map, make_crumbs=True): |
||
| 574 | """ Return a list of paths from each tuple of args from `values_map` |
||
| 575 | Parameters |
||
| 576 | ---------- |
||
| 577 | values_map: list of sequences of 2-tuple |
||
| 578 | Example: [[('subject_id', 'haensel'), ('candy', 'lollipop.png')], |
||
| 579 | [('subject_id', 'gretel'), ('candy', 'jujube.png')], |
||
| 580 | ] |
||
| 581 | |||
| 582 | make_crumbs: bool |
||
| 583 | If `make_crumbs` is True will create a Crumb for |
||
| 584 | each element of the result. Otherwise will return the plain paths. |
||
| 585 | Default: True. |
||
| 586 | |||
| 587 | Returns |
||
| 588 | ------- |
||
| 589 | paths: list of str or list of Crumb |
||
| 590 | """ |
||
| 591 | if make_crumbs: |
||
| 592 | return [self.replace(**dict(val)) for val in values_map] |
||
| 593 | else: |
||
| 594 | return [self._replace(self._path, **dict(val)) for val in values_map] |
||
| 595 | |||
| 596 | def ls(self, arg_name, fullpath=True, make_crumbs=True, check_exists=False): |
||
| 597 | """ Return the list of values for the argument crumb `arg_name`. |
||
| 598 | This will also unfold any other argument crumb that appears before in the |
||
| 599 | path. |
||
| 600 | Parameters |
||
| 601 | ---------- |
||
| 602 | arg_name: str |
||
| 603 | Name of the argument crumb to be unfolded. |
||
| 604 | |||
| 605 | fullpath: bool |
||
| 606 | If True will build the full path of the crumb path, will also append |
||
| 607 | the rest of crumbs not unfolded. |
||
| 608 | If False will only return the values for the argument with name |
||
| 609 | `arg_name`. |
||
| 610 | |||
| 611 | make_crumbs: bool |
||
| 612 | If `fullpath` and `make_crumbs` is True will create a Crumb for |
||
| 613 | each element of the result. |
||
| 614 | |||
| 615 | check_exists: bool |
||
| 616 | If True will return only str, Crumb or Path if it exists |
||
| 617 | in the file path, otherwise it may create file paths |
||
| 618 | that don't have to exist. |
||
| 619 | |||
| 620 | Returns |
||
| 621 | ------- |
||
| 622 | values: list of str or Crumb |
||
| 623 | |||
| 624 | Examples |
||
| 625 | -------- |
||
| 626 | >>> cr = Crumb(op.join(op.expanduser('~'), '{user_folder}')) |
||
| 627 | >>> user_folders = cr.ls('user_folder',fullpath=True,make_crumbs=True) |
||
| 628 | """ |
||
| 629 | self._check_open_args([arg_name]) |
||
| 630 | |||
| 631 | start_sym, _ = self._start_end_syms |
||
| 632 | |||
| 633 | # if the first chunk of the path is a parameter, I am not interested in this (for now) |
||
| 634 | if self._path.startswith(start_sym): |
||
| 635 | raise NotImplementedError("Cannot list paths that start with an argument. " |
||
| 636 | "If this is a relative path, use the `abspath()` member function.") |
||
| 637 | |||
| 638 | if make_crumbs and not fullpath: |
||
| 639 | raise ValueError("`make_crumbs` can only work if `fullpath` is also True.") |
||
| 640 | |||
| 641 | values_map = self.values_map(arg_name, check_exists=check_exists) |
||
| 642 | |||
| 643 | if fullpath: |
||
| 644 | paths = self.build_paths(values_map, make_crumbs=make_crumbs) |
||
| 645 | |||
| 646 | else: |
||
| 647 | paths = [dict(val)[arg_name] for val in values_map] |
||
| 648 | |||
| 649 | return sorted(paths) |
||
| 650 | |||
| 651 | def touch(self): |
||
| 652 | """ Create a leaf directory and all intermediate ones using the non |
||
| 653 | crumbed part of `crumb_path`. |
||
| 654 | If the target directory already exists, raise an IOError if exist_ok |
||
| 655 | is False. Otherwise no exception is raised. |
||
| 656 | Parameters |
||
| 657 | ---------- |
||
| 658 | crumb_path: str |
||
| 659 | |||
| 660 | exist_ok: bool |
||
| 661 | Default = True |
||
| 662 | |||
| 663 | Returns |
||
| 664 | ------- |
||
| 665 | nupath: str |
||
| 666 | The new path created. |
||
| 667 | """ |
||
| 668 | return self._touch(self._path) |
||
| 669 | |||
| 670 | def joinpath(self, suffix): |
||
| 671 | """ Return a copy of the current crumb with the `suffix` path appended. |
||
| 672 | If suffix has crumb arguments, the whole crumb will be updated. |
||
| 673 | Parameters |
||
| 674 | ---------- |
||
| 675 | suffix: str |
||
| 676 | |||
| 677 | Returns |
||
| 678 | ------- |
||
| 679 | cr: Crumb |
||
| 680 | """ |
||
| 681 | return Crumb(op.join(self._path, suffix)) |
||
| 682 | |||
| 683 | def exists(self): |
||
| 684 | """ Return True if the current crumb path is a possibly existing path, |
||
| 685 | False otherwise. |
||
| 686 | Returns |
||
| 687 | ------- |
||
| 688 | exists: bool |
||
| 689 | """ |
||
| 690 | if not self.has_crumbs(self._path): |
||
| 691 | return op.exists(str(self)) or op.islink(str(self)) |
||
| 692 | |||
| 693 | if not op.exists(self.split()[0]): |
||
| 694 | return False |
||
| 695 | |||
| 696 | last, _ = self._last_open_arg() |
||
| 697 | |||
| 698 | paths = self.ls(last, fullpath=True, make_crumbs=False, check_exists=False) |
||
| 699 | |||
| 700 | return any([self._split_exists(lp) for lp in paths]) |
||
| 701 | |||
| 702 | def has_files(self): |
||
| 703 | """ Return True if the current crumb path has any file in its |
||
| 704 | possible paths. |
||
| 705 | Returns |
||
| 706 | ------- |
||
| 707 | has_files: bool |
||
| 708 | """ |
||
| 709 | if not op.exists(list(self.split())[0]): |
||
| 710 | return False |
||
| 711 | |||
| 712 | last, _ = self._last_open_arg() |
||
| 713 | paths = self.ls(last, fullpath=True, make_crumbs=True, check_exists=True) |
||
| 714 | |||
| 715 | return any([op.isfile(str(lp)) for lp in paths]) |
||
| 716 | |||
| 717 | def unfold(self): |
||
| 718 | """ Return a list of all the existing paths until the last crumb argument. |
||
| 719 | Returns |
||
| 720 | ------- |
||
| 721 | paths: list of pathlib.Path |
||
| 722 | """ |
||
| 723 | return self.ls(self._last_open_arg()[0], fullpath=True, make_crumbs=True, check_exists=True) |
||
| 724 | |||
| 725 | def __getitem__(self, arg_name): |
||
| 726 | """ Return the existing values of the crumb argument `arg_name` |
||
| 727 | without removing duplicates. |
||
| 728 | Parameters |
||
| 729 | ---------- |
||
| 730 | arg_name: str |
||
| 731 | |||
| 732 | Returns |
||
| 733 | ------- |
||
| 734 | values: list of str |
||
| 735 | """ |
||
| 736 | if arg_name in self._argval: |
||
| 737 | return [self._argval[arg_name]] |
||
| 738 | else: |
||
| 739 | return self.ls(arg_name, fullpath=False, make_crumbs=False, check_exists=True) |
||
| 740 | |||
| 741 | def __setitem__(self, key, value): |
||
| 742 | if key not in self._argidx: |
||
| 743 | raise KeyError("Expected `arg_name` to be one of ({})," |
||
| 744 | " got {}.".format(list(self.open_args()), key)) |
||
| 745 | _ = self.set_args(**{key: value}) |
||
| 746 | |||
| 747 | def __ge__(self, other): |
||
| 748 | return self._path >= str(other) |
||
| 749 | |||
| 750 | def __le__(self, other): |
||
| 751 | return self._path <= str(other) |
||
| 752 | |||
| 753 | def __gt__(self, other): |
||
| 754 | return self._path > str(other) |
||
| 755 | |||
| 756 | def __lt__(self, other): |
||
| 757 | return self._path < str(other) |
||
| 758 | |||
| 759 | def __hash__(self): |
||
| 760 | return self._path.__hash__() |
||
| 761 | |||
| 762 | def __contains__(self, arg_name): |
||
| 763 | return arg_name in self.all_args() |
||
| 764 | |||
| 765 | def __repr__(self): |
||
| 766 | return '{}("{}")'.format(type(self).__name__, self._path) |
||
| 767 | |||
| 768 | def __str__(self): |
||
| 769 | return str(self._path) |
||
| 770 | |||
| 771 | def __eq__(self, other): |
||
| 772 | """ Return True if `self` and `other` are equal, False otherwise. |
||
| 773 | Parameters |
||
| 774 | ---------- |
||
| 775 | other: Crumb |
||
| 776 | |||
| 777 | Returns |
||
| 778 | ------- |
||
| 779 | is_equal: bool |
||
| 780 | """ |
||
| 781 | if self._path != other._path: |
||
| 782 | return False |
||
| 783 | |||
| 784 | if self._argidx != other._argidx: |
||
| 785 | return False |
||
| 786 | |||
| 787 | if self._argval != other._argval: |
||
| 788 | return False |
||
| 789 | |||
| 790 | if self._ignore != other._ignore: |
||
| 791 | return False |
||
| 792 | |||
| 793 | return True |
||
| 794 |