Conditions | 24 |
Total Lines | 90 |
Code Lines | 48 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like pocketutils.tools.path_tools.PathUtils.sanitize_node() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to pocketutils |
||
173 | def sanitize_node( |
||
174 | self: Self, |
||
175 | bit: PurePath | str, |
||
176 | *, |
||
177 | is_file: bool | None = None, |
||
178 | is_root_or_drive: bool | None = None, |
||
179 | fat: bool = False, |
||
180 | trim: bool = False, |
||
181 | ) -> str: |
||
182 | r""" |
||
183 | Sanitizes a path node such that it will be fine for major OSes and filesystems. |
||
184 | For example: |
||
185 | - 'plums;and/or;apples' becomes 'plums_and_or_apples' (escaped ; and /) |
||
186 | - 'null.txt' becomes '_null_.txt' ('null' is forbidden in Windows) |
||
187 | - 'abc ' becomes 'abc' (no trailing spaces) |
||
188 | |||
189 | The behavior is platform-independent -- os, sys, and pathlib are not used. |
||
190 | For ex, calling sanitize_path_node(r'C:\') returns r'C:\' on both Windows and Linux |
||
191 | If you want to sanitize a whole path, see sanitize_path instead. |
||
192 | |||
193 | Args: |
||
194 | bit: The node |
||
195 | is_file: False for directories, True otherwise, None if unknown |
||
196 | is_root_or_drive: True if known to be the root ('/') or a drive ('C:\'), None if unknown |
||
197 | fat: Also make compatible with FAT filesystems |
||
198 | trim: Truncate to 254 chars (otherwise fails) |
||
199 | """ |
||
200 | # since is_file and is_root_or_drive are both Optional[bool], let's be explicit and use 'is' for clarity |
||
201 | if is_file is True and is_root_or_drive is True: |
||
202 | msg = "is_file and is_root_or_drive are both true" |
||
203 | raise ValueIllegalError(msg) |
||
204 | if is_file is True and is_root_or_drive is None: |
||
205 | is_root_or_drive = False |
||
206 | if is_root_or_drive is True and is_file is None: |
||
207 | is_file = False |
||
208 | source_bit = copy(str(bit)) |
||
209 | bit = str(bit).strip() |
||
210 | # first, catch root or drive as long as is_root_or_drive is not false |
||
211 | # if is_root_or_drive is True (which is a weird call), then fail if it's not |
||
212 | # otherwise, it's not a root or drive letter, so keep going |
||
213 | if is_root_or_drive is not False: |
||
214 | # \ is allowed in Windows |
||
215 | if bit in ["/", "\\"]: |
||
216 | return bit |
||
217 | m = re.compile(r"^([A-Z]:)(?:\\)?$").fullmatch(bit) |
||
218 | # this is interesting |
||
219 | # for bit=='C:' and is_root_or_drive=None, |
||
220 | # it could be either a drive letter |
||
221 | # or a file path that should be corrected to 'C_' |
||
222 | # I guess here we're going with a drive letter |
||
223 | if m is not None: |
||
224 | # we need C:\ and not C: because: |
||
225 | # Path('C:\\', '5').is_absolute() is True |
||
226 | # but Path('C:', '5').is_absolute() is False |
||
227 | # unfortunately, doing Path('C:\\', '5') on Linux gives 'C:\\/5' |
||
228 | # I can't handle that here, but sanitize_path() will account for it |
||
229 | return m.group(1) + "\\" |
||
230 | if is_root_or_drive is True: |
||
231 | msg = f"Node '{bit}' is not the root or a drive letter" |
||
232 | raise ValueIllegalError(msg, value=bit) |
||
233 | # just dots is invalid |
||
234 | if set(bit.replace(" ", "")) == "." and bit not in ["..", "."]: |
||
235 | bit = "_" + bit + "_" |
||
236 | # raise IllegalPathError(f"Node '{source_bit}' is invalid") |
||
237 | for q in _bad_chars: |
||
238 | bit = bit.replace(q, "_") |
||
239 | bad_strs = _bad_strs_fat if fat else _bad_strs |
||
240 | if bit.upper() in bad_strs: |
||
241 | # arbitrary decision |
||
242 | bit = "_" + bit + "_" |
||
243 | else: |
||
244 | stub, ext = os.path.splitext(bit) |
||
245 | if stub.upper() in bad_strs: |
||
246 | bit = "_" + stub + "_" + ext |
||
247 | if bit.strip() == "": |
||
248 | bit = "_" + bit + "_" |
||
249 | # raise IllegalPathError(f"Node '{source_bit}' is empty or contains only whitespace") |
||
250 | # "." cannot end a node |
||
251 | bit = bit.rstrip() |
||
252 | if is_file is not True and (bit == "." or bit == ".."): |
||
253 | return bit |
||
254 | # never allow '.' or ' ' to end a filename |
||
255 | bit = bit.rstrip(". ") |
||
256 | # do this after |
||
257 | if len(bit) > 254 and trim: |
||
258 | bit = bit[:254] |
||
259 | elif len(bit) > 254: |
||
260 | msg = f"Node '{source_bit}' has more than 254 characters" |
||
261 | raise ValueIllegalError(msg, value=bit) |
||
262 | return bit |
||
263 | |||
266 |