| Conditions | 22 |
| Total Lines | 128 |
| Code Lines | 79 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like pocketutils.tools.path_tools.PathTools.sanitize_path_node() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | import sys |
||
| 128 | @classmethod |
||
| 129 | def sanitize_path_node( |
||
| 130 | cls, |
||
| 131 | bit: PathLike, |
||
| 132 | is_file: Optional[bool] = None, |
||
| 133 | is_root_or_drive: Optional[bool] = None, |
||
| 134 | include_fat: bool = False, |
||
| 135 | ) -> str: |
||
| 136 | r""" |
||
| 137 | Sanitizes a path node such that it will be fine for major OSes and filesystems. |
||
| 138 | For example: |
||
| 139 | - 'plums;and/or;apples' becomes 'plums_and_or_apples' (escaped ; and /) |
||
| 140 | - 'null.txt' becomes '_null_.txt' ('null' is forbidden in Windows) |
||
| 141 | - 'abc ' becomes 'abc' (no trailing spaces) |
||
| 142 | The behavior is platform-independent -- os, sys, and pathlib are not used. |
||
| 143 | For ex, calling sanitize_path_node(r'C:\') returns r'C:\' on both Windows and Linux |
||
| 144 | If you want to sanitize a whole path, see sanitize_path instead. |
||
| 145 | |||
| 146 | Args: |
||
| 147 | bit: The node |
||
| 148 | is_file: False for directories, True otherwise, None if unknown |
||
| 149 | is_root_or_drive: True if known to be the root ('/') or a drive ('C:\'), None if unknown |
||
| 150 | include_fat: Also make compatible with FAT filesystems |
||
| 151 | |||
| 152 | Returns: |
||
| 153 | A string |
||
| 154 | """ |
||
| 155 | # since is_file and is_root_or_drive are both Optional[bool], let's be explicit and use 'is' for clarity |
||
| 156 | if is_file is True and is_root_or_drive is True: |
||
| 157 | raise ContradictoryRequestError("is_file and is_root_or_drive are both true") |
||
| 158 | if is_file is True and is_root_or_drive is None: |
||
| 159 | is_root_or_drive = False |
||
| 160 | if is_root_or_drive is True and is_file is None: |
||
| 161 | is_file = False |
||
| 162 | source_bit = copy(str(bit)) |
||
| 163 | bit = str(bit).strip() |
||
| 164 | # first, catch root or drive as long as is_root_or_drive is not false |
||
| 165 | # if is_root_or_drive is True (which is a weird call), then fail if it's not |
||
| 166 | # otherwise, it's not a root or drive letter, so keep going |
||
| 167 | if is_root_or_drive is not False: |
||
| 168 | # \ is allowed in Windows |
||
| 169 | if bit in ["/", "\\"]: |
||
| 170 | return bit |
||
| 171 | m = regex.compile(r"^([A-Z]:)(?:\\)?$", flags=regex.V1).fullmatch(bit) |
||
| 172 | # this is interesting |
||
| 173 | # for bit=='C:' and is_root_or_drive=None, |
||
| 174 | # it could be either a drive letter |
||
| 175 | # or a file path that should be corrected to 'C_' |
||
| 176 | # I guess here we're going with a drive letter |
||
| 177 | if m is not None: |
||
| 178 | # we need C:\ and not C: because: |
||
| 179 | # Path('C:\\', '5').is_absolute() is True |
||
| 180 | # but Path('C:', '5').is_absolute() is False |
||
| 181 | # unfortunately, doing Path('C:\\', '5') on Linux gives 'C:\\/5' |
||
| 182 | # I can't handle that here, but sanitize_path() will account for it |
||
| 183 | return m.group(1) + "\\" |
||
| 184 | if is_root_or_drive is True: |
||
| 185 | raise IllegalPathError(f"Node '{bit}' is not the root or a drive letter") |
||
| 186 | # note that we can't call WindowsPath.is_reserved because it can't be instantiated on non-Linux |
||
| 187 | # also, these appear to be different from the ones defined there |
||
| 188 | bad_chars = { |
||
| 189 | "<", |
||
| 190 | ">", |
||
| 191 | ":", |
||
| 192 | '"', |
||
| 193 | "|", |
||
| 194 | "?", |
||
| 195 | "*", |
||
| 196 | "\\", |
||
| 197 | "/", |
||
| 198 | *{chr(c) for c in range(128, 128 + 33)}, |
||
| 199 | *{chr(c) for c in range(0, 32)}, |
||
| 200 | "\t", |
||
| 201 | } |
||
| 202 | # don't handle Long UNC paths |
||
| 203 | # also cannot be blank or whitespace |
||
| 204 | # the $ suffixed ones are for FAT |
||
| 205 | # no CLOCK$, even with an ext |
||
| 206 | # also no SCREEN$ |
||
| 207 | bad_strs = { |
||
| 208 | "CON", |
||
| 209 | "PRN", |
||
| 210 | "AUX", |
||
| 211 | "NUL", |
||
| 212 | "COM1", |
||
| 213 | "COM2", |
||
| 214 | "COM3", |
||
| 215 | "COM4", |
||
| 216 | "COM5", |
||
| 217 | "COM6", |
||
| 218 | "COM7", |
||
| 219 | "COM8", |
||
| 220 | "COM9", |
||
| 221 | "LPT1", |
||
| 222 | "LPT2", |
||
| 223 | "LPT3", |
||
| 224 | "LPT4", |
||
| 225 | "LPT5", |
||
| 226 | "LPT6", |
||
| 227 | "LPT7", |
||
| 228 | "LPT8", |
||
| 229 | "LPT9", |
||
| 230 | } |
||
| 231 | if include_fat: |
||
| 232 | bad_strs += {"$IDLE$", "CONFIG$", "KEYBD$", "SCREEN$", "CLOCK$", "LST"} |
||
| 233 | # just dots is invalid |
||
| 234 | if set(bit.replace(" ", "")) == "." and bit not in ["..", "."]: |
||
| 235 | raise IllegalPathError(f"Node '{source_bit}' is invalid") |
||
| 236 | for q in bad_chars: |
||
| 237 | bit = bit.replace(q, "_") |
||
| 238 | if bit.upper() in bad_strs: |
||
| 239 | # arbitrary decision |
||
| 240 | bit = "_" + bit + "_" |
||
| 241 | else: |
||
| 242 | stub, ext = os.path.splitext(bit) |
||
| 243 | if stub.upper() in bad_strs: |
||
| 244 | bit = "_" + stub + "_" + ext |
||
| 245 | if bit.strip() == "": |
||
| 246 | raise IllegalPathError(f"Node '{source_bit}' is empty or contains only whitespace") |
||
| 247 | # do this after |
||
| 248 | if len(bit) > 254: |
||
| 249 | raise IllegalPathError(f"Node '{source_bit}' has more than 254 characters") |
||
| 250 | bit = bit.strip() |
||
| 251 | if is_file is not True and (bit == "." or bit == ".."): |
||
| 252 | return bit |
||
| 253 | # never allow '.' (or ' ') to end a filename |
||
| 254 | bit = bit.rstrip(".") |
||
| 255 | return bit |
||
| 256 | |||
| 259 |