| Conditions | 24 |
| Total Lines | 90 |
| Code Lines | 48 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like pocketutils.tools.path_tools.PathUtils.sanitize_node() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | # SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to pocketutils |
||
| 173 | def sanitize_node( |
||
| 174 | self: Self, |
||
| 175 | bit: PurePath | str, |
||
| 176 | *, |
||
| 177 | is_file: bool | None = None, |
||
| 178 | is_root_or_drive: bool | None = None, |
||
| 179 | fat: bool = False, |
||
| 180 | trim: bool = False, |
||
| 181 | ) -> str: |
||
| 182 | r""" |
||
| 183 | Sanitizes a path node such that it will be fine for major OSes and filesystems. |
||
| 184 | For example: |
||
| 185 | - 'plums;and/or;apples' becomes 'plums_and_or_apples' (escaped ; and /) |
||
| 186 | - 'null.txt' becomes '_null_.txt' ('null' is forbidden in Windows) |
||
| 187 | - 'abc ' becomes 'abc' (no trailing spaces) |
||
| 188 | |||
| 189 | The behavior is platform-independent -- os, sys, and pathlib are not used. |
||
| 190 | For ex, calling sanitize_path_node(r'C:\') returns r'C:\' on both Windows and Linux |
||
| 191 | If you want to sanitize a whole path, see sanitize_path instead. |
||
| 192 | |||
| 193 | Args: |
||
| 194 | bit: The node |
||
| 195 | is_file: False for directories, True otherwise, None if unknown |
||
| 196 | is_root_or_drive: True if known to be the root ('/') or a drive ('C:\'), None if unknown |
||
| 197 | fat: Also make compatible with FAT filesystems |
||
| 198 | trim: Truncate to 254 chars (otherwise fails) |
||
| 199 | """ |
||
| 200 | # since is_file and is_root_or_drive are both Optional[bool], let's be explicit and use 'is' for clarity |
||
| 201 | if is_file is True and is_root_or_drive is True: |
||
| 202 | msg = "is_file and is_root_or_drive are both true" |
||
| 203 | raise ValueIllegalError(msg) |
||
| 204 | if is_file is True and is_root_or_drive is None: |
||
| 205 | is_root_or_drive = False |
||
| 206 | if is_root_or_drive is True and is_file is None: |
||
| 207 | is_file = False |
||
| 208 | source_bit = copy(str(bit)) |
||
| 209 | bit = str(bit).strip() |
||
| 210 | # first, catch root or drive as long as is_root_or_drive is not false |
||
| 211 | # if is_root_or_drive is True (which is a weird call), then fail if it's not |
||
| 212 | # otherwise, it's not a root or drive letter, so keep going |
||
| 213 | if is_root_or_drive is not False: |
||
| 214 | # \ is allowed in Windows |
||
| 215 | if bit in ["/", "\\"]: |
||
| 216 | return bit |
||
| 217 | m = re.compile(r"^([A-Z]:)(?:\\)?$").fullmatch(bit) |
||
| 218 | # this is interesting |
||
| 219 | # for bit=='C:' and is_root_or_drive=None, |
||
| 220 | # it could be either a drive letter |
||
| 221 | # or a file path that should be corrected to 'C_' |
||
| 222 | # I guess here we're going with a drive letter |
||
| 223 | if m is not None: |
||
| 224 | # we need C:\ and not C: because: |
||
| 225 | # Path('C:\\', '5').is_absolute() is True |
||
| 226 | # but Path('C:', '5').is_absolute() is False |
||
| 227 | # unfortunately, doing Path('C:\\', '5') on Linux gives 'C:\\/5' |
||
| 228 | # I can't handle that here, but sanitize_path() will account for it |
||
| 229 | return m.group(1) + "\\" |
||
| 230 | if is_root_or_drive is True: |
||
| 231 | msg = f"Node '{bit}' is not the root or a drive letter" |
||
| 232 | raise ValueIllegalError(msg, value=bit) |
||
| 233 | # just dots is invalid |
||
| 234 | if set(bit.replace(" ", "")) == "." and bit not in ["..", "."]: |
||
| 235 | bit = "_" + bit + "_" |
||
| 236 | # raise IllegalPathError(f"Node '{source_bit}' is invalid") |
||
| 237 | for q in _bad_chars: |
||
| 238 | bit = bit.replace(q, "_") |
||
| 239 | bad_strs = _bad_strs_fat if fat else _bad_strs |
||
| 240 | if bit.upper() in bad_strs: |
||
| 241 | # arbitrary decision |
||
| 242 | bit = "_" + bit + "_" |
||
| 243 | else: |
||
| 244 | stub, ext = os.path.splitext(bit) |
||
| 245 | if stub.upper() in bad_strs: |
||
| 246 | bit = "_" + stub + "_" + ext |
||
| 247 | if bit.strip() == "": |
||
| 248 | bit = "_" + bit + "_" |
||
| 249 | # raise IllegalPathError(f"Node '{source_bit}' is empty or contains only whitespace") |
||
| 250 | # "." cannot end a node |
||
| 251 | bit = bit.rstrip() |
||
| 252 | if is_file is not True and (bit == "." or bit == ".."): |
||
| 253 | return bit |
||
| 254 | # never allow '.' or ' ' to end a filename |
||
| 255 | bit = bit.rstrip(". ") |
||
| 256 | # do this after |
||
| 257 | if len(bit) > 254 and trim: |
||
| 258 | bit = bit[:254] |
||
| 259 | elif len(bit) > 254: |
||
| 260 | msg = f"Node '{source_bit}' has more than 254 characters" |
||
| 261 | raise ValueIllegalError(msg, value=bit) |
||
| 262 | return bit |
||
| 263 | |||
| 266 |