| Conditions | 24 |
| Total Lines | 136 |
| Code Lines | 83 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like pocketutils.tools.path_tools.PathTools.sanitize_node() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | import sys |
||
| 112 | @classmethod |
||
| 113 | def sanitize_node( |
||
| 114 | cls, |
||
| 115 | bit: PathLike, |
||
| 116 | *, |
||
| 117 | is_file: Optional[bool] = None, |
||
| 118 | is_root_or_drive: Optional[bool] = None, |
||
| 119 | fat: bool = False, |
||
| 120 | trim: bool = False, |
||
| 121 | ) -> str: |
||
| 122 | r""" |
||
| 123 | Sanitizes a path node such that it will be fine for major OSes and filesystems. |
||
| 124 | For example: |
||
| 125 | - 'plums;and/or;apples' becomes 'plums_and_or_apples' (escaped ; and /) |
||
| 126 | - 'null.txt' becomes '_null_.txt' ('null' is forbidden in Windows) |
||
| 127 | - 'abc ' becomes 'abc' (no trailing spaces) |
||
| 128 | The behavior is platform-independent -- os, sys, and pathlib are not used. |
||
| 129 | For ex, calling sanitize_path_node(r'C:\') returns r'C:\' on both Windows and Linux |
||
| 130 | If you want to sanitize a whole path, see sanitize_path instead. |
||
| 131 | |||
| 132 | Args: |
||
| 133 | bit: The node |
||
| 134 | is_file: False for directories, True otherwise, None if unknown |
||
| 135 | is_root_or_drive: True if known to be the root ('/') or a drive ('C:\'), None if unknown |
||
| 136 | fat: Also make compatible with FAT filesystems |
||
| 137 | trim: Truncate to 254 chars (otherwise fails) |
||
| 138 | |||
| 139 | Returns: |
||
| 140 | A string |
||
| 141 | """ |
||
| 142 | # since is_file and is_root_or_drive are both Optional[bool], let's be explicit and use 'is' for clarity |
||
| 143 | if is_file is True and is_root_or_drive is True: |
||
| 144 | raise ContradictoryRequestError("is_file and is_root_or_drive are both true") |
||
| 145 | if is_file is True and is_root_or_drive is None: |
||
| 146 | is_root_or_drive = False |
||
| 147 | if is_root_or_drive is True and is_file is None: |
||
| 148 | is_file = False |
||
| 149 | source_bit = copy(str(bit)) |
||
| 150 | bit = str(bit).strip() |
||
| 151 | # first, catch root or drive as long as is_root_or_drive is not false |
||
| 152 | # if is_root_or_drive is True (which is a weird call), then fail if it's not |
||
| 153 | # otherwise, it's not a root or drive letter, so keep going |
||
| 154 | if is_root_or_drive is not False: |
||
| 155 | # \ is allowed in Windows |
||
| 156 | if bit in ["/", "\\"]: |
||
| 157 | return bit |
||
| 158 | m = regex.compile(r"^([A-Z]:)(?:\\)?$", flags=regex.V1).fullmatch(bit) |
||
| 159 | # this is interesting |
||
| 160 | # for bit=='C:' and is_root_or_drive=None, |
||
| 161 | # it could be either a drive letter |
||
| 162 | # or a file path that should be corrected to 'C_' |
||
| 163 | # I guess here we're going with a drive letter |
||
| 164 | if m is not None: |
||
| 165 | # we need C:\ and not C: because: |
||
| 166 | # Path('C:\\', '5').is_absolute() is True |
||
| 167 | # but Path('C:', '5').is_absolute() is False |
||
| 168 | # unfortunately, doing Path('C:\\', '5') on Linux gives 'C:\\/5' |
||
| 169 | # I can't handle that here, but sanitize_path() will account for it |
||
| 170 | return m.group(1) + "\\" |
||
| 171 | if is_root_or_drive is True: |
||
| 172 | raise IllegalPathError(f"Node '{bit}' is not the root or a drive letter") |
||
| 173 | # note that we can't call WindowsPath.is_reserved because it can't be instantiated on non-Linux |
||
| 174 | # also, these appear to be different from the ones defined there |
||
| 175 | bad_chars = { |
||
| 176 | "<", |
||
| 177 | ">", |
||
| 178 | ":", |
||
| 179 | '"', |
||
| 180 | "|", |
||
| 181 | "?", |
||
| 182 | "*", |
||
| 183 | "\\", |
||
| 184 | "/", |
||
| 185 | *{chr(c) for c in range(128, 128 + 33)}, |
||
| 186 | *{chr(c) for c in range(0, 32)}, |
||
| 187 | "\t", |
||
| 188 | } |
||
| 189 | # don't handle Long UNC paths |
||
| 190 | # also cannot be blank or whitespace |
||
| 191 | # the $ suffixed ones are for FAT |
||
| 192 | # no CLOCK$, even with an ext |
||
| 193 | # also no SCREEN$ |
||
| 194 | bad_strs = { |
||
| 195 | "CON", |
||
| 196 | "PRN", |
||
| 197 | "AUX", |
||
| 198 | "NUL", |
||
| 199 | "COM1", |
||
| 200 | "COM2", |
||
| 201 | "COM3", |
||
| 202 | "COM4", |
||
| 203 | "COM5", |
||
| 204 | "COM6", |
||
| 205 | "COM7", |
||
| 206 | "COM8", |
||
| 207 | "COM9", |
||
| 208 | "LPT1", |
||
| 209 | "LPT2", |
||
| 210 | "LPT3", |
||
| 211 | "LPT4", |
||
| 212 | "LPT5", |
||
| 213 | "LPT6", |
||
| 214 | "LPT7", |
||
| 215 | "LPT8", |
||
| 216 | "LPT9", |
||
| 217 | } |
||
| 218 | if fat: |
||
| 219 | bad_strs += {"$IDLE$", "CONFIG$", "KEYBD$", "SCREEN$", "CLOCK$", "LST"} |
||
| 220 | # just dots is invalid |
||
| 221 | if set(bit.replace(" ", "")) == "." and bit not in ["..", "."]: |
||
| 222 | bit = "_" + bit + "_" |
||
| 223 | # raise IllegalPathError(f"Node '{source_bit}' is invalid") |
||
| 224 | for q in bad_chars: |
||
| 225 | bit = bit.replace(q, "_") |
||
| 226 | if bit.upper() in bad_strs: |
||
| 227 | # arbitrary decision |
||
| 228 | bit = "_" + bit + "_" |
||
| 229 | else: |
||
| 230 | stub, ext = os.path.splitext(bit) |
||
| 231 | if stub.upper() in bad_strs: |
||
| 232 | bit = "_" + stub + "_" + ext |
||
| 233 | if bit.strip() == "": |
||
| 234 | bit = "_" + bit + "_" |
||
| 235 | # raise IllegalPathError(f"Node '{source_bit}' is empty or contains only whitespace") |
||
| 236 | # "." cannot end a node |
||
| 237 | bit = bit.rstrip() |
||
| 238 | if is_file is not True and (bit == "." or bit == ".."): |
||
| 239 | return bit |
||
| 240 | # never allow '.' or ' ' to end a filename |
||
| 241 | bit = bit.rstrip(". ") |
||
| 242 | # do this after |
||
| 243 | if len(bit) > 254 and trim: |
||
| 244 | bit = bit[:254] |
||
| 245 | elif len(bit) > 254: |
||
| 246 | raise IllegalPathError(f"Node '{source_bit}' has more than 254 characters") |
||
| 247 | return bit |
||
| 248 | |||
| 251 |