1
|
|
|
# SPDX-FileCopyrightText: 2020 Peter Bittner <[email protected]> |
2
|
|
|
# |
3
|
|
|
# SPDX-License-Identifier: GPL-3.0-or-later |
4
|
|
|
|
5
|
|
|
""" |
6
|
|
|
Modern, cross-platform, pure-Python pyclean implementation. |
7
|
|
|
""" |
8
|
|
|
|
9
|
|
|
import logging |
10
|
|
|
import os |
11
|
|
|
from pathlib import Path |
12
|
|
|
|
13
|
|
|
BYTECODE_FILES = ['.pyc', '.pyo'] |
14
|
|
|
BYTECODE_DIRS = ['__pycache__'] |
15
|
|
|
DEBRIS_TOPICS = { |
16
|
|
|
'cache': [ |
17
|
|
|
'.cache/**/*', |
18
|
|
|
'.cache/', |
19
|
|
|
], |
20
|
|
|
'coverage': [ |
21
|
|
|
'.coverage', |
22
|
|
|
'coverage.json', |
23
|
|
|
'coverage.lcov', |
24
|
|
|
'coverage.xml', |
25
|
|
|
'htmlcov/**/*', |
26
|
|
|
'htmlcov/', |
27
|
|
|
], |
28
|
|
|
'jupyter': [ |
29
|
|
|
'.ipynb_checkpoints/**/*', |
30
|
|
|
'.ipynb_checkpoints/', |
31
|
|
|
], |
32
|
|
|
'mypy': [ |
33
|
|
|
'.mypy_cache/**/*', |
34
|
|
|
'.mypy_cache/', |
35
|
|
|
], |
36
|
|
|
'package': [ |
37
|
|
|
'build/bdist.*/**/*', |
38
|
|
|
'build/bdist.*/', |
39
|
|
|
'build/lib/**/*', |
40
|
|
|
'build/lib/', |
41
|
|
|
'build/', |
42
|
|
|
'dist/**/*', |
43
|
|
|
'dist/', |
44
|
|
|
'sdist/**/*', |
45
|
|
|
'sdist/', |
46
|
|
|
'*.egg-info/**/*', |
47
|
|
|
'*.egg-info/', |
48
|
|
|
], |
49
|
|
|
'pytest': [ |
50
|
|
|
'.pytest_cache/**/*', |
51
|
|
|
'.pytest_cache/', |
52
|
|
|
'pytestdebug.log', |
53
|
|
|
], |
54
|
|
|
'ruff': [ |
55
|
|
|
'.ruff_cache/**/*', |
56
|
|
|
'.ruff_cache/', |
57
|
|
|
], |
58
|
|
|
'tox': [ |
59
|
|
|
'.tox/**/*', |
60
|
|
|
'.tox/', |
61
|
|
|
], |
62
|
|
|
} |
63
|
|
|
|
64
|
|
|
|
65
|
|
|
class CleanupRunner: |
66
|
|
|
"""Module-level configuration and value store.""" |
67
|
|
|
|
68
|
|
|
def __init__(self): |
69
|
|
|
"""Cleanup runner with optional dry-run behavior.""" |
70
|
|
|
self.unlink = None |
71
|
|
|
self.rmdir = None |
72
|
|
|
self.ignore = None |
73
|
|
|
self.unlink_count = None |
74
|
|
|
self.unlink_failed = None |
75
|
|
|
self.rmdir_count = None |
76
|
|
|
self.rmdir_failed = None |
77
|
|
|
|
78
|
|
|
def configure(self, args): |
79
|
|
|
"""Set up runner according to command line options.""" |
80
|
|
|
self.unlink = print_filename if args.dry_run else remove_file |
81
|
|
|
self.rmdir = print_dirname if args.dry_run else remove_directory |
82
|
|
|
self.ignore = args.ignore |
83
|
|
|
self.unlink_count = 0 |
84
|
|
|
self.unlink_failed = 0 |
85
|
|
|
self.rmdir_count = 0 |
86
|
|
|
self.rmdir_failed = 0 |
87
|
|
|
|
88
|
|
|
|
89
|
|
|
log = logging.getLogger(__name__) |
90
|
|
|
Runner = CleanupRunner() |
91
|
|
|
|
92
|
|
|
|
93
|
|
|
def normalize(path_pattern: str) -> str: |
94
|
|
|
""" |
95
|
|
|
Normalize path separators in a pattern for cross-platform support. |
96
|
|
|
|
97
|
|
|
On Windows, both forward slash and backslash are valid path separators. |
98
|
|
|
On Unix/Posix, only forward slash is valid (backslash can be part of filename). |
99
|
|
|
""" |
100
|
|
|
return path_pattern.replace(os.sep, os.altsep or os.sep) |
101
|
|
|
|
102
|
|
|
|
103
|
|
|
def should_ignore(path: Path, ignore_patterns: list[str]) -> bool: |
104
|
|
|
""" |
105
|
|
|
Check if a path should be ignored based on ignore patterns. |
106
|
|
|
|
107
|
|
|
Patterns can be: |
108
|
|
|
- Simple names like 'bar': matches any directory with that name |
109
|
|
|
- Paths like 'foo/bar': matches 'bar' directory inside 'foo' directory |
110
|
|
|
and also ignores everything inside that directory |
111
|
|
|
""" |
112
|
|
|
if not ignore_patterns: |
113
|
|
|
return False |
114
|
|
|
|
115
|
|
|
for pattern in ignore_patterns: |
116
|
|
|
# Check if pattern has multiple components (is a path with separators) |
117
|
|
|
pattern_parts = Path(normalize(pattern)).parts |
118
|
|
|
if len(pattern_parts) > 1: |
119
|
|
|
# Pattern contains path separator - match relative path |
120
|
|
|
# Path must have at least as many parts as the pattern |
121
|
|
|
if len(path.parts) < len(pattern_parts): |
122
|
|
|
continue |
123
|
|
|
# Check if pattern matches anywhere in the path hierarchy |
124
|
|
|
for i in range(len(path.parts) - len(pattern_parts) + 1): |
125
|
|
|
path_slice = path.parts[i : i + len(pattern_parts)] |
126
|
|
|
if path_slice == pattern_parts: |
127
|
|
|
return True |
128
|
|
|
# Simple name - match the directory name anywhere |
129
|
|
|
elif path.name == pattern: |
130
|
|
|
return True |
131
|
|
|
return False |
132
|
|
|
|
133
|
|
|
|
134
|
|
|
def remove_file(fileobj): |
135
|
|
|
"""Attempt to delete a file object for real.""" |
136
|
|
|
log.debug('Deleting file: %s', fileobj) |
137
|
|
|
try: |
138
|
|
|
fileobj.unlink() |
139
|
|
|
Runner.unlink_count += 1 |
140
|
|
|
except OSError as err: |
141
|
|
|
log.debug('File not deleted. %s', err) |
142
|
|
|
Runner.unlink_failed += 1 |
143
|
|
|
|
144
|
|
|
|
145
|
|
|
def remove_directory(dirobj): |
146
|
|
|
"""Attempt to remove a directory object for real.""" |
147
|
|
|
log.debug('Removing directory: %s', dirobj) |
148
|
|
|
try: |
149
|
|
|
dirobj.rmdir() |
150
|
|
|
Runner.rmdir_count += 1 |
151
|
|
|
except OSError as err: |
152
|
|
|
log.debug('Directory not removed. %s', err) |
153
|
|
|
Runner.rmdir_failed += 1 |
154
|
|
|
|
155
|
|
|
|
156
|
|
|
def print_filename(fileobj): |
157
|
|
|
"""Only display the file name, used with --dry-run.""" |
158
|
|
|
log.debug('Would delete file: %s', fileobj) |
159
|
|
|
Runner.unlink_count += 1 |
160
|
|
|
|
161
|
|
|
|
162
|
|
|
def print_dirname(dirobj): |
163
|
|
|
"""Only display the directory name, used with --dry-run.""" |
164
|
|
|
log.debug('Would delete directory: %s', dirobj) |
165
|
|
|
Runner.rmdir_count += 1 |
166
|
|
|
|
167
|
|
|
|
168
|
|
|
def pyclean(args): |
169
|
|
|
"""Cross-platform cleaning of Python bytecode.""" |
170
|
|
|
Runner.configure(args) |
171
|
|
|
|
172
|
|
|
for dir_name in args.directory: |
173
|
|
|
dir_path = Path(dir_name) |
174
|
|
|
|
175
|
|
|
log.info('Cleaning directory %s', dir_path) |
176
|
|
|
descend_and_clean(dir_path, BYTECODE_FILES, BYTECODE_DIRS) |
177
|
|
|
|
178
|
|
|
for topic in args.debris: |
179
|
|
|
remove_debris_for(topic, dir_path) |
180
|
|
|
|
181
|
|
|
remove_freeform_targets(dir_path, args.erase, args.yes, args.dry_run) |
182
|
|
|
|
183
|
|
|
log.info( |
184
|
|
|
'Total %d files, %d directories %s.', |
185
|
|
|
Runner.unlink_count, |
186
|
|
|
Runner.rmdir_count, |
187
|
|
|
'would be removed' if args.dry_run else 'removed', |
188
|
|
|
) |
189
|
|
|
|
190
|
|
|
if Runner.unlink_failed or Runner.rmdir_failed: |
191
|
|
|
log.debug( |
192
|
|
|
'%d files, %d directories %s not be removed.', |
193
|
|
|
Runner.unlink_failed, |
194
|
|
|
Runner.rmdir_failed, |
195
|
|
|
'would' if args.dry_run else 'could', |
196
|
|
|
) |
197
|
|
|
|
198
|
|
|
# Suggest --debris option if it wasn't used |
199
|
|
|
if not args.debris: |
200
|
|
|
suggest_debris_option(args) |
201
|
|
|
|
202
|
|
|
|
203
|
|
|
def descend_and_clean(directory, file_types, dir_names): |
204
|
|
|
""" |
205
|
|
|
Walk and descend a directory tree, cleaning up files of a certain type |
206
|
|
|
along the way. Only delete directories if they are empty, in the end. |
207
|
|
|
""" |
208
|
|
|
for child in sorted(directory.iterdir()): |
209
|
|
|
if child.is_file(): |
210
|
|
|
if child.suffix in file_types: |
211
|
|
|
Runner.unlink(child) |
212
|
|
|
elif child.is_dir(): |
213
|
|
|
if should_ignore(child, Runner.ignore): |
214
|
|
|
log.debug('Skipping %s', child) |
215
|
|
|
else: |
216
|
|
|
descend_and_clean(child, file_types, dir_names) |
217
|
|
|
|
218
|
|
|
if child.name in dir_names: |
219
|
|
|
Runner.rmdir(child) |
220
|
|
|
else: |
221
|
|
|
log.debug('Ignoring %s (neither a file nor a folder)', child) |
222
|
|
|
|
223
|
|
|
|
224
|
|
|
def remove_debris_for(topic, directory): |
225
|
|
|
""" |
226
|
|
|
Clean up debris for a specific topic. |
227
|
|
|
""" |
228
|
|
|
log.debug('Scanning for debris of %s ...', topic.title()) |
229
|
|
|
|
230
|
|
|
patterns = DEBRIS_TOPICS[topic] |
231
|
|
|
recursive_delete_debris(directory, patterns) |
232
|
|
|
|
233
|
|
|
|
234
|
|
|
def remove_freeform_targets(directory, glob_patterns, yes, dry_run=False): |
235
|
|
|
""" |
236
|
|
|
Remove free-form targets using globbing. |
237
|
|
|
|
238
|
|
|
This is **potentially dangerous** since users can delete everything |
239
|
|
|
anywhere in their file system, including the entire project they're |
240
|
|
|
working on. For this reason, the implementation imposes the following |
241
|
|
|
(user experience-related) restrictions: |
242
|
|
|
|
243
|
|
|
- Deleting (directories) is not recursive, directory contents must be |
244
|
|
|
explicitly specified using globbing (e.g. ``dirname/**/*``). |
245
|
|
|
- The user is responsible for the deletion order, so that a directory |
246
|
|
|
is empty when it is attempted to be deleted. |
247
|
|
|
- A confirmation prompt for the deletion of every single file system |
248
|
|
|
object is shown (unless the ``--yes`` option is used, in addition). |
249
|
|
|
""" |
250
|
|
|
for path_glob in glob_patterns: |
251
|
|
|
log.debug('Erase file system objects matching: %s', path_glob) |
252
|
|
|
delete_filesystem_objects(directory, path_glob, prompt=not yes, dry_run=dry_run) |
253
|
|
|
|
254
|
|
|
|
255
|
|
|
def recursive_delete_debris(directory, patterns): |
256
|
|
|
""" |
257
|
|
|
Recursively delete debris matching any of the given patterns. |
258
|
|
|
|
259
|
|
|
This function walks the directory tree once and applies all patterns |
260
|
|
|
at each level, avoiding redundant directory scans. |
261
|
|
|
""" |
262
|
|
|
for pattern in patterns: |
263
|
|
|
delete_filesystem_objects(directory, pattern) |
264
|
|
|
|
265
|
|
|
try: |
266
|
|
|
subdirs = ( |
267
|
|
|
Path(entry.path) for entry in os.scandir(directory) if entry.is_dir() |
|
|
|
|
268
|
|
|
) |
269
|
|
|
except (OSError, PermissionError) as err: |
270
|
|
|
log.warning('Cannot access directory %s: %s', directory, err) |
271
|
|
|
return |
272
|
|
|
|
273
|
|
|
for subdir in subdirs: |
274
|
|
|
if should_ignore(subdir, Runner.ignore): |
275
|
|
|
log.debug('Skipping %s', subdir) |
276
|
|
|
else: |
277
|
|
|
recursive_delete_debris(subdir, patterns) |
278
|
|
|
|
279
|
|
|
|
280
|
|
|
def delete_filesystem_objects(directory, path_glob, prompt=False, dry_run=False): |
281
|
|
|
""" |
282
|
|
|
Identifies all pathnames matching a specific glob pattern, and attempts |
283
|
|
|
to delete them in the proper order, optionally asking for confirmation. |
284
|
|
|
|
285
|
|
|
Implementation Note: We sort the file system objects in *reverse order* |
286
|
|
|
and first delete *all files* before removing directories. This way we |
287
|
|
|
make sure that the directories that are deepest down in the hierarchy |
288
|
|
|
are empty (for both files & directories) when we attempt to remove them. |
289
|
|
|
""" |
290
|
|
|
all_names = sorted(directory.glob(path_glob), reverse=True) |
291
|
|
|
dirs = (name for name in all_names if name.is_dir() and not name.is_symlink()) |
|
|
|
|
292
|
|
|
files = (name for name in all_names if not name.is_dir() or name.is_symlink()) |
293
|
|
|
|
294
|
|
|
for file_object in files: |
295
|
|
|
file_type = 'symlink' if file_object.is_symlink() else 'file' |
296
|
|
|
if ( |
297
|
|
|
not dry_run |
298
|
|
|
and prompt |
299
|
|
|
and not confirm('Delete %s %s' % (file_type, file_object)) |
300
|
|
|
): |
301
|
|
|
Runner.unlink_failed += 1 |
302
|
|
|
continue |
303
|
|
|
Runner.unlink(file_object) |
304
|
|
|
|
305
|
|
|
for dir_object in dirs: |
306
|
|
|
if ( |
307
|
|
|
not dry_run |
308
|
|
|
and prompt |
309
|
|
|
and not confirm('Remove empty directory %s' % dir_object) |
310
|
|
|
): |
311
|
|
|
Runner.rmdir_failed += 1 |
312
|
|
|
continue |
313
|
|
|
Runner.rmdir(dir_object) |
314
|
|
|
|
315
|
|
|
|
316
|
|
|
def confirm(message): |
317
|
|
|
"""An interactive confirmation prompt.""" |
318
|
|
|
try: |
319
|
|
|
answer = input('%s? ' % message) |
320
|
|
|
return answer.strip().lower() in ['y', 'yes'] |
321
|
|
|
except KeyboardInterrupt: |
322
|
|
|
msg = 'Aborted by user.' |
323
|
|
|
raise SystemExit(msg) |
324
|
|
|
|
325
|
|
|
|
326
|
|
|
def detect_debris_in_directory(directory): |
327
|
|
|
""" |
328
|
|
|
Scan a directory for debris artifacts and return a list of detected topics. |
329
|
|
|
""" |
330
|
|
|
detected_topics = [] |
331
|
|
|
|
332
|
|
|
for topic, patterns in DEBRIS_TOPICS.items(): |
333
|
|
|
for pattern in patterns: |
334
|
|
|
# Skip patterns that are for recursive cleanup (contain **) |
335
|
|
|
if '**' in pattern: |
336
|
|
|
continue |
337
|
|
|
# Check if the pattern matches anything in the directory |
338
|
|
|
matches = list(directory.glob(pattern)) |
339
|
|
|
if matches: |
340
|
|
|
detected_topics.append(topic) |
341
|
|
|
break # Found at least one match for this topic, move to next |
342
|
|
|
|
343
|
|
|
return detected_topics |
344
|
|
|
|
345
|
|
|
|
346
|
|
|
def suggest_debris_option(args): |
347
|
|
|
""" |
348
|
|
|
Suggest using the --debris option when it wasn't used. |
349
|
|
|
Optionally provide targeted suggestions based on detected artifacts. |
350
|
|
|
""" |
351
|
|
|
# Collect all detected debris topics across all directories |
352
|
|
|
all_detected = set() |
353
|
|
|
for dir_name in args.directory: |
354
|
|
|
dir_path = Path(dir_name) |
355
|
|
|
if dir_path.exists(): |
356
|
|
|
detected = detect_debris_in_directory(dir_path) |
357
|
|
|
all_detected.update(detected) |
358
|
|
|
|
359
|
|
|
if all_detected: |
360
|
|
|
# Provide targeted suggestion |
361
|
|
|
topics_str = ' '.join(sorted(all_detected)) |
362
|
|
|
log.info( |
363
|
|
|
'Hint: Use --debris to also clean up build artifacts. Detected: %s', |
364
|
|
|
topics_str, |
365
|
|
|
) |
366
|
|
|
else: |
367
|
|
|
# Provide general suggestion |
368
|
|
|
log.info( |
369
|
|
|
'Hint: Use --debris to also clean up build artifacts ' |
370
|
|
|
'from common Python development tools.', |
371
|
|
|
) |
372
|
|
|
|