Passed
Pull Request — main (#103)
by Peter
01:12
created

pyclean.modern   F

Complexity

Total Complexity 70

Size/Duplication

Total Lines 399
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 70
eloc 230
dl 0
loc 399
rs 2.8
c 0
b 0
f 0

2 Methods

Rating   Name   Duplication   Size   Complexity  
A CleanupRunner.configure() 0 9 3
A CleanupRunner.__init__() 0 9 1

16 Functions

Rating   Name   Duplication   Size   Complexity  
A remove_directory() 0 9 2
A print_filename() 0 4 1
A remove_file() 0 9 2
A print_dirname() 0 4 1
A normalize() 0 8 1
B should_ignore() 0 29 8
B descend_and_clean() 0 19 7
A remove_debris_for() 0 8 1
C pyclean() 0 37 9
A confirm() 0 8 2
C delete_filesystem_objects() 0 34 10
A detect_debris_in_directory() 0 18 5
A recursive_delete_debris() 0 23 5
B remove_empty_directories() 0 23 6
A remove_freeform_targets() 0 19 2
A suggest_debris_option() 0 24 4

How to fix   Complexity   

Complexity

Complex classes like pyclean.modern often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# SPDX-FileCopyrightText: 2020 Peter Bittner <[email protected]>
2
#
3
# SPDX-License-Identifier: GPL-3.0-or-later
4
5
"""
6
Modern, cross-platform, pure-Python pyclean implementation.
7
"""
8
9
import logging
10
import os
11
from pathlib import Path
12
13
BYTECODE_FILES = ['.pyc', '.pyo']
14
BYTECODE_DIRS = ['__pycache__']
15
DEBRIS_TOPICS = {
16
    'cache': [
17
        '.cache/**/*',
18
        '.cache/',
19
    ],
20
    'coverage': [
21
        '.coverage',
22
        'coverage.json',
23
        'coverage.lcov',
24
        'coverage.xml',
25
        'htmlcov/**/*',
26
        'htmlcov/',
27
    ],
28
    'jupyter': [
29
        '.ipynb_checkpoints/**/*',
30
        '.ipynb_checkpoints/',
31
    ],
32
    'mypy': [
33
        '.mypy_cache/**/*',
34
        '.mypy_cache/',
35
    ],
36
    'package': [
37
        'build/bdist.*/**/*',
38
        'build/bdist.*/',
39
        'build/lib/**/*',
40
        'build/lib/',
41
        'build/',
42
        'dist/**/*',
43
        'dist/',
44
        'sdist/**/*',
45
        'sdist/',
46
        '*.egg-info/**/*',
47
        '*.egg-info/',
48
    ],
49
    'pytest': [
50
        '.pytest_cache/**/*',
51
        '.pytest_cache/',
52
        'pytestdebug.log',
53
    ],
54
    'ruff': [
55
        '.ruff_cache/**/*',
56
        '.ruff_cache/',
57
    ],
58
    'tox': [
59
        '.tox/**/*',
60
        '.tox/',
61
    ],
62
}
63
64
65
class CleanupRunner:
66
    """Module-level configuration and value store."""
67
68
    def __init__(self):
69
        """Cleanup runner with optional dry-run behavior."""
70
        self.unlink = None
71
        self.rmdir = None
72
        self.ignore = None
73
        self.unlink_count = None
74
        self.unlink_failed = None
75
        self.rmdir_count = None
76
        self.rmdir_failed = None
77
78
    def configure(self, args):
79
        """Set up runner according to command line options."""
80
        self.unlink = print_filename if args.dry_run else remove_file
81
        self.rmdir = print_dirname if args.dry_run else remove_directory
82
        self.ignore = args.ignore
83
        self.unlink_count = 0
84
        self.unlink_failed = 0
85
        self.rmdir_count = 0
86
        self.rmdir_failed = 0
87
88
89
log = logging.getLogger(__name__)
90
Runner = CleanupRunner()
91
92
93
def normalize(path_pattern: str) -> str:
94
    """
95
    Normalize path separators in a pattern for cross-platform support.
96
97
    On Windows, both forward slash and backslash are valid path separators.
98
    On Unix/Posix, only forward slash is valid (backslash can be part of filename).
99
    """
100
    return path_pattern.replace(os.sep, os.altsep or os.sep)
101
102
103
def should_ignore(path: Path, ignore_patterns: list[str]) -> bool:
104
    """
105
    Check if a path should be ignored based on ignore patterns.
106
107
    Patterns can be:
108
    - Simple names like 'bar': matches any directory with that name
109
    - Paths like 'foo/bar': matches 'bar' directory inside 'foo' directory
110
      and also ignores everything inside that directory
111
    """
112
    if not ignore_patterns:
113
        return False
114
115
    for pattern in ignore_patterns:
116
        # Check if pattern has multiple components (is a path with separators)
117
        pattern_parts = Path(normalize(pattern)).parts
118
        if len(pattern_parts) > 1:
119
            # Pattern contains path separator - match relative path
120
            # Path must have at least as many parts as the pattern
121
            if len(path.parts) < len(pattern_parts):
122
                continue
123
            # Check if pattern matches anywhere in the path hierarchy
124
            for i in range(len(path.parts) - len(pattern_parts) + 1):
125
                path_slice = path.parts[i : i + len(pattern_parts)]
126
                if path_slice == pattern_parts:
127
                    return True
128
        # Simple name - match the directory name anywhere
129
        elif path.name == pattern:
130
            return True
131
    return False
132
133
134
def remove_file(fileobj):
135
    """Attempt to delete a file object for real."""
136
    log.debug('Deleting file: %s', fileobj)
137
    try:
138
        fileobj.unlink()
139
        Runner.unlink_count += 1
140
    except OSError as err:
141
        log.debug('File not deleted. %s', err)
142
        Runner.unlink_failed += 1
143
144
145
def remove_directory(dirobj):
146
    """Attempt to remove a directory object for real."""
147
    log.debug('Removing directory: %s', dirobj)
148
    try:
149
        dirobj.rmdir()
150
        Runner.rmdir_count += 1
151
    except OSError as err:
152
        log.debug('Directory not removed. %s', err)
153
        Runner.rmdir_failed += 1
154
155
156
def print_filename(fileobj):
157
    """Only display the file name, used with --dry-run."""
158
    log.debug('Would delete file: %s', fileobj)
159
    Runner.unlink_count += 1
160
161
162
def print_dirname(dirobj):
163
    """Only display the directory name, used with --dry-run."""
164
    log.debug('Would delete directory: %s', dirobj)
165
    Runner.rmdir_count += 1
166
167
168
def pyclean(args):
169
    """Cross-platform cleaning of Python bytecode."""
170
    Runner.configure(args)
171
172
    for dir_name in args.directory:
173
        dir_path = Path(dir_name)
174
175
        log.info('Cleaning directory %s', dir_path)
176
        descend_and_clean(dir_path, BYTECODE_FILES, BYTECODE_DIRS)
177
178
        for topic in args.debris:
179
            remove_debris_for(topic, dir_path)
180
181
        remove_freeform_targets(dir_path, args.erase, args.yes, args.dry_run)
182
183
        if args.folders:
184
            log.debug('Removing empty directories...')
185
            remove_empty_directories(dir_path)
186
187
    log.info(
188
        'Total %d files, %d directories %s.',
189
        Runner.unlink_count,
190
        Runner.rmdir_count,
191
        'would be removed' if args.dry_run else 'removed',
192
    )
193
194
    if Runner.unlink_failed or Runner.rmdir_failed:
195
        log.debug(
196
            '%d files, %d directories %s not be removed.',
197
            Runner.unlink_failed,
198
            Runner.rmdir_failed,
199
            'would' if args.dry_run else 'could',
200
        )
201
202
    # Suggest --debris option if it wasn't used
203
    if not args.debris:
204
        suggest_debris_option(args)
205
206
207
def descend_and_clean(directory, file_types, dir_names):
208
    """
209
    Walk and descend a directory tree, cleaning up files of a certain type
210
    along the way. Only delete directories if they are empty, in the end.
211
    """
212
    for child in sorted(directory.iterdir()):
213
        if child.is_file():
214
            if child.suffix in file_types:
215
                Runner.unlink(child)
216
        elif child.is_dir():
217
            if should_ignore(child, Runner.ignore):
218
                log.debug('Skipping %s', child)
219
            else:
220
                descend_and_clean(child, file_types, dir_names)
221
222
            if child.name in dir_names:
223
                Runner.rmdir(child)
224
        else:
225
            log.debug('Ignoring %s (neither a file nor a folder)', child)
226
227
228
def remove_debris_for(topic, directory):
229
    """
230
    Clean up debris for a specific topic.
231
    """
232
    log.debug('Scanning for debris of %s ...', topic.title())
233
234
    patterns = DEBRIS_TOPICS[topic]
235
    recursive_delete_debris(directory, patterns)
236
237
238
def remove_empty_directories(directory):
239
    """
240
    Recursively remove empty directories in the given directory tree.
241
242
    This walks the directory tree in post-order (bottom-up), attempting to
243
    remove directories that are empty.
244
    """
245
    try:
246
        subdirs = [child for child in directory.iterdir() if child.is_dir()]
247
    except (OSError, PermissionError) as err:
248
        log.warning('Cannot access directory %s: %s', directory, err)
249
        return
250
251
    for subdir in subdirs:
252
        if should_ignore(subdir, Runner.ignore):
253
            log.debug('Skipping %s', subdir)
254
        else:
255
            remove_empty_directories(subdir)  # recurse down the hierarchy
256
            try:
257
                if next(subdir.iterdir(), None) is None:
258
                    Runner.rmdir(subdir)
259
            except (OSError, PermissionError) as err:
260
                log.debug('Cannot check or remove directory %s: %s', subdir, err)
261
262
263
def remove_freeform_targets(directory, glob_patterns, yes, dry_run=False):
264
    """
265
    Remove free-form targets using globbing.
266
267
    This is **potentially dangerous** since users can delete everything
268
    anywhere in their file system, including the entire project they're
269
    working on. For this reason, the implementation imposes the following
270
    (user experience-related) restrictions:
271
272
    - Deleting (directories) is not recursive, directory contents must be
273
      explicitly specified using globbing (e.g. ``dirname/**/*``).
274
    - The user is responsible for the deletion order, so that a directory
275
      is empty when it is attempted to be deleted.
276
    - A confirmation prompt for the deletion of every single file system
277
      object is shown (unless the ``--yes`` option is used, in addition).
278
    """
279
    for path_glob in glob_patterns:
280
        log.debug('Erase file system objects matching: %s', path_glob)
281
        delete_filesystem_objects(directory, path_glob, prompt=not yes, dry_run=dry_run)
282
283
284
def recursive_delete_debris(directory, patterns):
285
    """
286
    Recursively delete debris matching any of the given patterns.
287
288
    This function walks the directory tree once and applies all patterns
289
    at each level, avoiding redundant directory scans.
290
    """
291
    for pattern in patterns:
292
        delete_filesystem_objects(directory, pattern)
293
294
    try:
295
        subdirs = (
296
            Path(entry.path) for entry in os.scandir(directory) if entry.is_dir()
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable entry does not seem to be defined.
Loading history...
297
        )
298
    except (OSError, PermissionError) as err:
299
        log.warning('Cannot access directory %s: %s', directory, err)
300
        return
301
302
    for subdir in subdirs:
303
        if should_ignore(subdir, Runner.ignore):
304
            log.debug('Skipping %s', subdir)
305
        else:
306
            recursive_delete_debris(subdir, patterns)
307
308
309
def delete_filesystem_objects(directory, path_glob, prompt=False, dry_run=False):
310
    """
311
    Identifies all pathnames matching a specific glob pattern, and attempts
312
    to delete them in the proper order, optionally asking for confirmation.
313
314
    Implementation Note: We sort the file system objects in *reverse order*
315
    and first delete *all files* before removing directories. This way we
316
    make sure that the directories that are deepest down in the hierarchy
317
    are empty (for both files & directories) when we attempt to remove them.
318
    """
319
    all_names = sorted(directory.glob(path_glob), reverse=True)
320
    dirs = (name for name in all_names if name.is_dir() and not name.is_symlink())
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable name does not seem to be defined.
Loading history...
321
    files = (name for name in all_names if not name.is_dir() or name.is_symlink())
322
323
    for file_object in files:
324
        file_type = 'symlink' if file_object.is_symlink() else 'file'
325
        if (
326
            not dry_run
327
            and prompt
328
            and not confirm('Delete %s %s' % (file_type, file_object))
329
        ):
330
            Runner.unlink_failed += 1
331
            continue
332
        Runner.unlink(file_object)
333
334
    for dir_object in dirs:
335
        if (
336
            not dry_run
337
            and prompt
338
            and not confirm('Remove empty directory %s' % dir_object)
339
        ):
340
            Runner.rmdir_failed += 1
341
            continue
342
        Runner.rmdir(dir_object)
343
344
345
def confirm(message):
346
    """An interactive confirmation prompt."""
347
    try:
348
        answer = input('%s? ' % message)
349
        return answer.strip().lower() in ['y', 'yes']
350
    except KeyboardInterrupt:
351
        msg = 'Aborted by user.'
352
        raise SystemExit(msg)
353
354
355
def detect_debris_in_directory(directory):
356
    """
357
    Scan a directory for debris artifacts and return a list of detected topics.
358
    """
359
    detected_topics = []
360
361
    for topic, patterns in DEBRIS_TOPICS.items():
362
        for pattern in patterns:
363
            # Skip patterns that are for recursive cleanup (contain **)
364
            if '**' in pattern:
365
                continue
366
            # Check if the pattern matches anything in the directory
367
            matches = list(directory.glob(pattern))
368
            if matches:
369
                detected_topics.append(topic)
370
                break  # Found at least one match for this topic, move to next
371
372
    return detected_topics
373
374
375
def suggest_debris_option(args):
376
    """
377
    Suggest using the --debris option when it wasn't used.
378
    Optionally provide targeted suggestions based on detected artifacts.
379
    """
380
    # Collect all detected debris topics across all directories
381
    all_detected = set()
382
    for dir_name in args.directory:
383
        dir_path = Path(dir_name)
384
        if dir_path.exists():
385
            detected = detect_debris_in_directory(dir_path)
386
            all_detected.update(detected)
387
388
    if all_detected:
389
        # Provide targeted suggestion
390
        topics_str = ' '.join(sorted(all_detected))
391
        log.info(
392
            'Hint: Use --debris to also clean up build artifacts. Detected: %s',
393
            topics_str,
394
        )
395
    else:
396
        # Provide general suggestion
397
        log.info(
398
            'Hint: Use --debris to also clean up build artifacts '
399
            'from common Python development tools.',
400
        )
401