Passed
Pull Request — main (#93)
by
unknown
01:22
created

pyclean.modern   B

Complexity

Total Complexity 50

Size/Duplication

Total Lines 297
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 50
eloc 173
dl 0
loc 297
rs 8.4
c 0
b 0
f 0

2 Methods

Rating   Name   Duplication   Size   Complexity  
A CleanupRunner.configure() 0 9 3
A CleanupRunner.__init__() 0 9 1

11 Functions

Rating   Name   Duplication   Size   Complexity  
A remove_directory() 0 9 2
A confirm() 0 8 2
C delete_filesystem_objects() 0 34 11
A print_filename() 0 4 1
A remove_file() 0 9 2
B descend_and_clean() 0 19 7
A print_dirname() 0 4 1
A remove_freeform_targets() 0 19 2
A remove_debris_for() 0 8 2
B pyclean() 0 28 7
C should_ignore() 0 41 9

How to fix   Complexity   

Complexity

Complex classes like pyclean.modern often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# SPDX-FileCopyrightText: 2020 Peter Bittner <[email protected]>
2
#
3
# SPDX-License-Identifier: GPL-3.0-or-later
4
5
"""
6
Modern, cross-platform, pure-Python pyclean implementation.
7
"""
8
9
import logging
10
import os
11
from pathlib import Path
12
13
BYTECODE_FILES = ['.pyc', '.pyo']
14
BYTECODE_DIRS = ['__pycache__']
15
DEBRIS_TOPICS = {
16
    'cache': [
17
        '.cache/**/*',
18
        '.cache/',
19
    ],
20
    'coverage': [
21
        '.coverage',
22
        'coverage.json',
23
        'coverage.lcov',
24
        'coverage.xml',
25
        'htmlcov/**/*',
26
        'htmlcov/',
27
    ],
28
    'jupyter': [
29
        '.ipynb_checkpoints/**/*',
30
        '.ipynb_checkpoints/',
31
    ],
32
    'mypy': [
33
        '.mypy_cache/**/*',
34
        '.mypy_cache/',
35
    ],
36
    'package': [
37
        'build/bdist.*/**/*',
38
        'build/bdist.*/',
39
        'build/lib/**/*',
40
        'build/lib/',
41
        'build/',
42
        'dist/**/*',
43
        'dist/',
44
        'sdist/**/*',
45
        'sdist/',
46
        '*.egg-info/**/*',
47
        '*.egg-info/',
48
    ],
49
    'pytest': [
50
        '.pytest_cache/**/*',
51
        '.pytest_cache/',
52
        'pytestdebug.log',
53
    ],
54
    'ruff': [
55
        '.ruff_cache/**/*',
56
        '.ruff_cache/',
57
    ],
58
    'tox': [
59
        '.tox/**/*',
60
        '.tox/',
61
    ],
62
}
63
64
65
class CleanupRunner:
66
    """Module-level configuration and value store."""
67
68
    def __init__(self):
69
        """Cleanup runner with optional dry-run behavior."""
70
        self.unlink = None
71
        self.rmdir = None
72
        self.ignore = None
73
        self.unlink_count = None
74
        self.unlink_failed = None
75
        self.rmdir_count = None
76
        self.rmdir_failed = None
77
78
    def configure(self, args):
79
        """Set up runner according to command line options."""
80
        self.unlink = print_filename if args.dry_run else remove_file
81
        self.rmdir = print_dirname if args.dry_run else remove_directory
82
        self.ignore = args.ignore
83
        self.unlink_count = 0
84
        self.unlink_failed = 0
85
        self.rmdir_count = 0
86
        self.rmdir_failed = 0
87
88
89
log = logging.getLogger(__name__)
90
Runner = CleanupRunner()
91
92
93
def should_ignore(path, ignore_patterns):
94
    """
95
    Check if a path should be ignored based on ignore patterns.
96
    
97
    Patterns can be:
98
    - Simple names like 'bar': matches any directory with that name
99
    - Paths like 'foo/bar': matches 'bar' directory inside 'foo' directory
100
    
101
    Args:
102
        path: Path object to check
103
        ignore_patterns: List of ignore patterns
104
        
105
    Returns:
106
        True if the path should be ignored, False otherwise
107
    """
108
    if ignore_patterns is None:
109
        return False
110
    if not ignore_patterns:
111
        return False
112
        
113
    for pattern in ignore_patterns:
114
        if '/' in pattern:
115
            # Pattern contains path separator - match relative path
116
            # We need to check if the path ends with this pattern
117
            try:
118
                # Get parts from the pattern
119
                pattern_parts = Path(pattern).parts
120
                # Path must have at least as many parts as the pattern
121
                if len(path.parts) < len(pattern_parts):
122
                    continue
123
                # Get the trailing parts of the path that match the pattern length
124
                path_parts = path.parts[-len(pattern_parts):]
125
                if path_parts == pattern_parts:
126
                    return True
127
            except (ValueError, IndexError):
128
                continue
129
        else:
130
            # Simple name - match the directory name anywhere
131
            if path.name == pattern:
132
                return True
133
    return False
134
135
136
def remove_file(fileobj):
137
    """Attempt to delete a file object for real."""
138
    log.debug('Deleting file: %s', fileobj)
139
    try:
140
        fileobj.unlink()
141
        Runner.unlink_count += 1
142
    except OSError as err:
143
        log.debug('File not deleted. %s', err)
144
        Runner.unlink_failed += 1
145
146
147
def remove_directory(dirobj):
148
    """Attempt to remove a directory object for real."""
149
    log.debug('Removing directory: %s', dirobj)
150
    try:
151
        dirobj.rmdir()
152
        Runner.rmdir_count += 1
153
    except OSError as err:
154
        log.debug('Directory not removed. %s', err)
155
        Runner.rmdir_failed += 1
156
157
158
def print_filename(fileobj):
159
    """Only display the file name, used with --dry-run."""
160
    log.debug('Would delete file: %s', fileobj)
161
    Runner.unlink_count += 1
162
163
164
def print_dirname(dirobj):
165
    """Only display the directory name, used with --dry-run."""
166
    log.debug('Would delete directory: %s', dirobj)
167
    Runner.rmdir_count += 1
168
169
170
def pyclean(args):
171
    """Cross-platform cleaning of Python bytecode."""
172
    Runner.configure(args)
173
174
    for dir_name in args.directory:
175
        dir_path = Path(dir_name)
176
177
        log.info('Cleaning directory %s', dir_path)
178
        descend_and_clean(dir_path, BYTECODE_FILES, BYTECODE_DIRS)
179
180
        for topic in args.debris:
181
            remove_debris_for(topic, dir_path)
182
183
        remove_freeform_targets(args.erase, args.yes, dir_path)
184
185
    log.info(
186
        'Total %d files, %d directories %s.',
187
        Runner.unlink_count,
188
        Runner.rmdir_count,
189
        'would be removed' if args.dry_run else 'removed',
190
    )
191
192
    if Runner.unlink_failed or Runner.rmdir_failed:
193
        log.debug(
194
            '%d files, %d directories %s not be removed.',
195
            Runner.unlink_failed,
196
            Runner.rmdir_failed,
197
            'would' if args.dry_run else 'could',
198
        )
199
200
201
def descend_and_clean(directory, file_types, dir_names):
202
    """
203
    Walk and descend a directory tree, cleaning up files of a certain type
204
    along the way. Only delete directories if they are empty, in the end.
205
    """
206
    for child in sorted(directory.iterdir()):
207
        if child.is_file():
208
            if child.suffix in file_types:
209
                Runner.unlink(child)
210
        elif child.is_dir():
211
            if should_ignore(child, Runner.ignore):
212
                log.debug('Skipping %s', child)
213
            else:
214
                descend_and_clean(child, file_types, dir_names)
215
216
            if child.name in dir_names:
217
                Runner.rmdir(child)
218
        else:
219
            log.debug('Ignoring %s (neither a file nor a folder)', child)
220
221
222
def remove_debris_for(topic, directory):
223
    """
224
    Clean up debris for a specific topic.
225
    """
226
    log.debug('Scanning for debris of %s ...', topic.title())
227
228
    for path_glob in DEBRIS_TOPICS[topic]:
229
        delete_filesystem_objects(directory, path_glob, recursive=True)
230
231
232
def remove_freeform_targets(glob_patterns, yes, directory):
233
    """
234
    Remove free-form targets using globbing.
235
236
    This is **potentially dangerous** since users can delete everything
237
    anywhere in their file system, including the entire project they're
238
    working on. For this reason, the implementation imposes the following
239
    (user experience-related) restrictions:
240
241
    - Deleting (directories) is not recursive, directory contents must be
242
      explicitly specified using globbing (e.g. ``dirname/**/*``).
243
    - The user is responsible for the deletion order, so that a directory
244
      is empty when it is attempted to be deleted.
245
    - A confirmation prompt for the deletion of every single file system
246
      object is shown (unless the ``--yes`` option is used, in addition).
247
    """
248
    for path_glob in glob_patterns:
249
        log.debug('Erase file system objects matching: %s', path_glob)
250
        delete_filesystem_objects(directory, path_glob, prompt=not yes)
251
252
253
def delete_filesystem_objects(directory, path_glob, prompt=False, recursive=False):
254
    """
255
    Identifies all pathnames matching a specific glob pattern, and attempts
256
    to delete them in the proper order, optionally asking for confirmation.
257
258
    Implementation Note: We sort the file system objects in *reverse order*
259
    and first delete *all files* before removing directories. This way we
260
    make sure that the directories that are deepest down in the hierarchy
261
    are empty (for both files & directories) when we attempt to remove them.
262
    """
263
    all_names = sorted(directory.glob(path_glob), reverse=True)
264
    dirs = (name for name in all_names if name.is_dir() and not name.is_symlink())
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable name does not seem to be defined.
Loading history...
265
    files = (name for name in all_names if not name.is_dir() or name.is_symlink())
266
267
    for file_object in files:
268
        file_type = 'symlink' if file_object.is_symlink() else 'file'
269
        if prompt and not confirm('Delete %s %s' % (file_type, file_object)):
270
            Runner.unlink_failed += 1
271
            continue
272
        Runner.unlink(file_object)
273
274
    for dir_object in dirs:
275
        if prompt and not confirm('Remove empty directory %s' % dir_object):
276
            Runner.rmdir_failed += 1
277
            continue
278
        Runner.rmdir(dir_object)
279
280
    if recursive:
281
        subdirs = (Path(name.path) for name in os.scandir(directory) if name.is_dir())
282
        for subdir in subdirs:
283
            if should_ignore(subdir, Runner.ignore):
284
                log.debug('Skipping %s', subdir)
285
            else:
286
                delete_filesystem_objects(subdir, path_glob, prompt, recursive)
287
288
289
def confirm(message):
290
    """An interactive confirmation prompt."""
291
    try:
292
        answer = input('%s? ' % message)
293
        return answer.strip().lower() in ['y', 'yes']
294
    except KeyboardInterrupt:
295
        msg = 'Aborted by user.'
296
        raise SystemExit(msg)
297