Passed
Pull Request — main (#93)
by
unknown
01:13
created

pyclean.modern._normalize_pattern()   A

Complexity

Conditions 2

Size

Total Lines 18
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 4
nop 1
dl 0
loc 18
rs 10
c 0
b 0
f 0
1
# SPDX-FileCopyrightText: 2020 Peter Bittner <[email protected]>
2
#
3
# SPDX-License-Identifier: GPL-3.0-or-later
4
5
"""
6
Modern, cross-platform, pure-Python pyclean implementation.
7
"""
8
9
import logging
10
import os
11
from pathlib import Path
12
13
BYTECODE_FILES = ['.pyc', '.pyo']
14
BYTECODE_DIRS = ['__pycache__']
15
DEBRIS_TOPICS = {
16
    'cache': [
17
        '.cache/**/*',
18
        '.cache/',
19
    ],
20
    'coverage': [
21
        '.coverage',
22
        'coverage.json',
23
        'coverage.lcov',
24
        'coverage.xml',
25
        'htmlcov/**/*',
26
        'htmlcov/',
27
    ],
28
    'jupyter': [
29
        '.ipynb_checkpoints/**/*',
30
        '.ipynb_checkpoints/',
31
    ],
32
    'mypy': [
33
        '.mypy_cache/**/*',
34
        '.mypy_cache/',
35
    ],
36
    'package': [
37
        'build/bdist.*/**/*',
38
        'build/bdist.*/',
39
        'build/lib/**/*',
40
        'build/lib/',
41
        'build/',
42
        'dist/**/*',
43
        'dist/',
44
        'sdist/**/*',
45
        'sdist/',
46
        '*.egg-info/**/*',
47
        '*.egg-info/',
48
    ],
49
    'pytest': [
50
        '.pytest_cache/**/*',
51
        '.pytest_cache/',
52
        'pytestdebug.log',
53
    ],
54
    'ruff': [
55
        '.ruff_cache/**/*',
56
        '.ruff_cache/',
57
    ],
58
    'tox': [
59
        '.tox/**/*',
60
        '.tox/',
61
    ],
62
}
63
64
65
class CleanupRunner:
66
    """Module-level configuration and value store."""
67
68
    def __init__(self):
69
        """Cleanup runner with optional dry-run behavior."""
70
        self.unlink = None
71
        self.rmdir = None
72
        self.ignore = None
73
        self.unlink_count = None
74
        self.unlink_failed = None
75
        self.rmdir_count = None
76
        self.rmdir_failed = None
77
78
    def configure(self, args):
79
        """Set up runner according to command line options."""
80
        self.unlink = print_filename if args.dry_run else remove_file
81
        self.rmdir = print_dirname if args.dry_run else remove_directory
82
        self.ignore = args.ignore
83
        self.unlink_count = 0
84
        self.unlink_failed = 0
85
        self.rmdir_count = 0
86
        self.rmdir_failed = 0
87
88
89
log = logging.getLogger(__name__)
90
Runner = CleanupRunner()
91
92
93
def _normalize_pattern(pattern):
94
    """
95
    Normalize path separators in a pattern for cross-platform support.
96
97
    On Windows, both forward slash and backslash are valid path separators.
98
    On Unix/Posix, only forward slash is valid (backslash can be part of filename).
99
100
    Args:
101
        pattern: The ignore pattern string
102
103
    Returns:
104
        Normalized pattern with forward slashes
105
    """
106
    # On Windows, normalize backslashes to forward slashes
107
    # On Unix, keep pattern as-is (backslash might be part of filename)
108
    if os.name == 'nt':  # Windows
109
        return pattern.replace('\\', '/')
110
    return pattern
111
112
113
def should_ignore(path, ignore_patterns):
114
    """
115
    Check if a path should be ignored based on ignore patterns.
116
117
    Patterns can be:
118
    - Simple names like 'bar': matches any directory with that name
119
    - Paths like 'foo/bar': matches 'bar' directory inside 'foo' directory
120
      and also ignores everything inside that directory
121
122
    Args:
123
        path: Path object to check
124
        ignore_patterns: List of ignore patterns
125
126
    Returns:
127
        True if the path should be ignored, False otherwise
128
    """
129
    if not ignore_patterns:
130
        return False
131
132
    for pattern in ignore_patterns:
133
        # Normalize pattern for cross-platform support
134
        normalized_pattern = _normalize_pattern(pattern)
135
136
        # Check for path separator
137
        has_sep = os.sep in normalized_pattern
138
        has_altsep = os.altsep and os.altsep in normalized_pattern
139
        if has_sep or has_altsep:
140
            # Pattern contains path separator - match relative path
141
            # Check if the pattern matches anywhere in the path hierarchy
142
            try:
143
                # Get parts from the pattern
144
                pattern_parts = Path(normalized_pattern).parts
145
                # Path must have at least as many parts as the pattern
146
                if len(path.parts) < len(pattern_parts):
147
                    continue
148
                # Check if pattern matches at any position in the path
149
                for i in range(len(path.parts) - len(pattern_parts) + 1):
150
                    path_slice = path.parts[i : i + len(pattern_parts)]
151
                    if path_slice == pattern_parts:
152
                        return True
153
            except (ValueError, IndexError):
154
                continue
155
        # Simple name - match the directory name anywhere
156
        elif path.name == pattern:
157
            return True
158
    return False
159
160
161
def remove_file(fileobj):
162
    """Attempt to delete a file object for real."""
163
    log.debug('Deleting file: %s', fileobj)
164
    try:
165
        fileobj.unlink()
166
        Runner.unlink_count += 1
167
    except OSError as err:
168
        log.debug('File not deleted. %s', err)
169
        Runner.unlink_failed += 1
170
171
172
def remove_directory(dirobj):
173
    """Attempt to remove a directory object for real."""
174
    log.debug('Removing directory: %s', dirobj)
175
    try:
176
        dirobj.rmdir()
177
        Runner.rmdir_count += 1
178
    except OSError as err:
179
        log.debug('Directory not removed. %s', err)
180
        Runner.rmdir_failed += 1
181
182
183
def print_filename(fileobj):
184
    """Only display the file name, used with --dry-run."""
185
    log.debug('Would delete file: %s', fileobj)
186
    Runner.unlink_count += 1
187
188
189
def print_dirname(dirobj):
190
    """Only display the directory name, used with --dry-run."""
191
    log.debug('Would delete directory: %s', dirobj)
192
    Runner.rmdir_count += 1
193
194
195
def pyclean(args):
196
    """Cross-platform cleaning of Python bytecode."""
197
    Runner.configure(args)
198
199
    for dir_name in args.directory:
200
        dir_path = Path(dir_name)
201
202
        log.info('Cleaning directory %s', dir_path)
203
        descend_and_clean(dir_path, BYTECODE_FILES, BYTECODE_DIRS)
204
205
        for topic in args.debris:
206
            remove_debris_for(topic, dir_path)
207
208
        remove_freeform_targets(args.erase, args.yes, dir_path)
209
210
    log.info(
211
        'Total %d files, %d directories %s.',
212
        Runner.unlink_count,
213
        Runner.rmdir_count,
214
        'would be removed' if args.dry_run else 'removed',
215
    )
216
217
    if Runner.unlink_failed or Runner.rmdir_failed:
218
        log.debug(
219
            '%d files, %d directories %s not be removed.',
220
            Runner.unlink_failed,
221
            Runner.rmdir_failed,
222
            'would' if args.dry_run else 'could',
223
        )
224
225
    # Suggest --debris option if it wasn't used
226
    if not args.debris:
227
        suggest_debris_option(args)
228
229
230
def descend_and_clean(directory, file_types, dir_names):
231
    """
232
    Walk and descend a directory tree, cleaning up files of a certain type
233
    along the way. Only delete directories if they are empty, in the end.
234
    """
235
    for child in sorted(directory.iterdir()):
236
        if child.is_file():
237
            if child.suffix in file_types:
238
                Runner.unlink(child)
239
        elif child.is_dir():
240
            if should_ignore(child, Runner.ignore):
241
                log.debug('Skipping %s', child)
242
            else:
243
                descend_and_clean(child, file_types, dir_names)
244
245
            if child.name in dir_names:
246
                Runner.rmdir(child)
247
        else:
248
            log.debug('Ignoring %s (neither a file nor a folder)', child)
249
250
251
def remove_debris_for(topic, directory):
252
    """
253
    Clean up debris for a specific topic.
254
    """
255
    log.debug('Scanning for debris of %s ...', topic.title())
256
257
    for path_glob in DEBRIS_TOPICS[topic]:
258
        delete_filesystem_objects(directory, path_glob, recursive=True)
259
260
261
def remove_freeform_targets(glob_patterns, yes, directory):
262
    """
263
    Remove free-form targets using globbing.
264
265
    This is **potentially dangerous** since users can delete everything
266
    anywhere in their file system, including the entire project they're
267
    working on. For this reason, the implementation imposes the following
268
    (user experience-related) restrictions:
269
270
    - Deleting (directories) is not recursive, directory contents must be
271
      explicitly specified using globbing (e.g. ``dirname/**/*``).
272
    - The user is responsible for the deletion order, so that a directory
273
      is empty when it is attempted to be deleted.
274
    - A confirmation prompt for the deletion of every single file system
275
      object is shown (unless the ``--yes`` option is used, in addition).
276
    """
277
    for path_glob in glob_patterns:
278
        log.debug('Erase file system objects matching: %s', path_glob)
279
        delete_filesystem_objects(directory, path_glob, prompt=not yes)
280
281
282
def delete_filesystem_objects(directory, path_glob, prompt=False, recursive=False):
283
    """
284
    Identifies all pathnames matching a specific glob pattern, and attempts
285
    to delete them in the proper order, optionally asking for confirmation.
286
287
    Implementation Note: We sort the file system objects in *reverse order*
288
    and first delete *all files* before removing directories. This way we
289
    make sure that the directories that are deepest down in the hierarchy
290
    are empty (for both files & directories) when we attempt to remove them.
291
    """
292
    all_names = sorted(directory.glob(path_glob), reverse=True)
293
    dirs = (name for name in all_names if name.is_dir() and not name.is_symlink())
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable name does not seem to be defined.
Loading history...
294
    files = (name for name in all_names if not name.is_dir() or name.is_symlink())
295
296
    for file_object in files:
297
        file_type = 'symlink' if file_object.is_symlink() else 'file'
298
        if prompt and not confirm('Delete %s %s' % (file_type, file_object)):
299
            Runner.unlink_failed += 1
300
            continue
301
        Runner.unlink(file_object)
302
303
    for dir_object in dirs:
304
        if prompt and not confirm('Remove empty directory %s' % dir_object):
305
            Runner.rmdir_failed += 1
306
            continue
307
        Runner.rmdir(dir_object)
308
309
    if recursive:
310
        subdirs = (Path(name.path) for name in os.scandir(directory) if name.is_dir())
311
        for subdir in subdirs:
312
            if should_ignore(subdir, Runner.ignore):
313
                log.debug('Skipping %s', subdir)
314
            else:
315
                delete_filesystem_objects(subdir, path_glob, prompt, recursive)
316
317
318
def confirm(message):
319
    """An interactive confirmation prompt."""
320
    try:
321
        answer = input('%s? ' % message)
322
        return answer.strip().lower() in ['y', 'yes']
323
    except KeyboardInterrupt:
324
        msg = 'Aborted by user.'
325
        raise SystemExit(msg)
326
327
328
def detect_debris_in_directory(directory):
329
    """
330
    Scan a directory for debris artifacts and return a list of detected topics.
331
    """
332
    detected_topics = []
333
334
    for topic, patterns in DEBRIS_TOPICS.items():
335
        for pattern in patterns:
336
            # Skip patterns that are for recursive cleanup (contain **)
337
            if '**' in pattern:
338
                continue
339
            # Check if the pattern matches anything in the directory
340
            matches = list(directory.glob(pattern))
341
            if matches:
342
                detected_topics.append(topic)
343
                break  # Found at least one match for this topic, move to next
344
345
    return detected_topics
346
347
348
def suggest_debris_option(args):
349
    """
350
    Suggest using the --debris option when it wasn't used.
351
    Optionally provide targeted suggestions based on detected artifacts.
352
    """
353
    # Collect all detected debris topics across all directories
354
    all_detected = set()
355
    for dir_name in args.directory:
356
        dir_path = Path(dir_name)
357
        if dir_path.exists():
358
            detected = detect_debris_in_directory(dir_path)
359
            all_detected.update(detected)
360
361
    if all_detected:
362
        # Provide targeted suggestion
363
        topics_str = ' '.join(sorted(all_detected))
364
        log.info(
365
            'Hint: Use --debris to also clean up build artifacts. Detected: %s',
366
            topics_str,
367
        )
368
    else:
369
        # Provide general suggestion
370
        log.info(
371
            'Hint: Use --debris to also clean up build artifacts '
372
            'from common Python development tools.',
373
        )
374