Passed
Pull Request — main (#105)
by
unknown
01:11
created

pyclean.modern.normalize()   A

Complexity

Conditions 1

Size

Total Lines 8
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 1
dl 0
loc 8
rs 10
c 0
b 0
f 0
1
# SPDX-FileCopyrightText: 2020 Peter Bittner <[email protected]>
2
#
3
# SPDX-License-Identifier: GPL-3.0-or-later
4
5
"""
6
Modern, cross-platform, pure-Python pyclean implementation.
7
"""
8
9
import logging
10
import os
11
import subprocess
12
from pathlib import Path
13
14
BYTECODE_FILES = ['.pyc', '.pyo']
15
BYTECODE_DIRS = ['__pycache__']
16
DEBRIS_TOPICS = {
17
    'cache': [
18
        '.cache/**/*',
19
        '.cache/',
20
    ],
21
    'coverage': [
22
        '.coverage',
23
        'coverage.json',
24
        'coverage.lcov',
25
        'coverage.xml',
26
        'htmlcov/**/*',
27
        'htmlcov/',
28
    ],
29
    'jupyter': [
30
        '.ipynb_checkpoints/**/*',
31
        '.ipynb_checkpoints/',
32
    ],
33
    'mypy': [
34
        '.mypy_cache/**/*',
35
        '.mypy_cache/',
36
    ],
37
    'package': [
38
        'build/bdist.*/**/*',
39
        'build/bdist.*/',
40
        'build/lib/**/*',
41
        'build/lib/',
42
        'build/',
43
        'dist/**/*',
44
        'dist/',
45
        'sdist/**/*',
46
        'sdist/',
47
        '*.egg-info/**/*',
48
        '*.egg-info/',
49
    ],
50
    'pytest': [
51
        '.pytest_cache/**/*',
52
        '.pytest_cache/',
53
        'pytestdebug.log',
54
    ],
55
    'ruff': [
56
        '.ruff_cache/**/*',
57
        '.ruff_cache/',
58
    ],
59
    'tox': [
60
        '.tox/**/*',
61
        '.tox/',
62
    ],
63
}
64
65
66
class CleanupRunner:
67
    """Module-level configuration and value store."""
68
69
    def __init__(self):
70
        """Cleanup runner with optional dry-run behavior."""
71
        self.unlink = None
72
        self.rmdir = None
73
        self.ignore = None
74
        self.unlink_count = None
75
        self.unlink_failed = None
76
        self.rmdir_count = None
77
        self.rmdir_failed = None
78
79
    def configure(self, args):
80
        """Set up runner according to command line options."""
81
        self.unlink = print_filename if args.dry_run else remove_file
82
        self.rmdir = print_dirname if args.dry_run else remove_directory
83
        self.ignore = args.ignore
84
        self.unlink_count = 0
85
        self.unlink_failed = 0
86
        self.rmdir_count = 0
87
        self.rmdir_failed = 0
88
89
90
log = logging.getLogger(__name__)
91
Runner = CleanupRunner()
92
93
94
def normalize(path_pattern: str) -> str:
95
    """
96
    Normalize path separators in a pattern for cross-platform support.
97
98
    On Windows, both forward slash and backslash are valid path separators.
99
    On Unix/Posix, only forward slash is valid (backslash can be part of filename).
100
    """
101
    return path_pattern.replace(os.sep, os.altsep or os.sep)
102
103
104
def should_ignore(path: Path, ignore_patterns: list[str]) -> bool:
105
    """
106
    Check if a path should be ignored based on ignore patterns.
107
108
    Patterns can be:
109
    - Simple names like 'bar': matches any directory with that name
110
    - Paths like 'foo/bar': matches 'bar' directory inside 'foo' directory
111
      and also ignores everything inside that directory
112
    """
113
    if not ignore_patterns:
114
        return False
115
116
    for pattern in ignore_patterns:
117
        # Check if pattern has multiple components (is a path with separators)
118
        pattern_parts = Path(normalize(pattern)).parts
119
        if len(pattern_parts) > 1:
120
            # Pattern contains path separator - match relative path
121
            # Path must have at least as many parts as the pattern
122
            if len(path.parts) < len(pattern_parts):
123
                continue
124
            # Check if pattern matches anywhere in the path hierarchy
125
            for i in range(len(path.parts) - len(pattern_parts) + 1):
126
                path_slice = path.parts[i : i + len(pattern_parts)]
127
                if path_slice == pattern_parts:
128
                    return True
129
        # Simple name - match the directory name anywhere
130
        elif path.name == pattern:
131
            return True
132
    return False
133
134
135
def remove_file(fileobj):
136
    """Attempt to delete a file object for real."""
137
    log.debug('Deleting file: %s', fileobj)
138
    try:
139
        fileobj.unlink()
140
        Runner.unlink_count += 1
141
    except OSError as err:
142
        log.debug('File not deleted. %s', err)
143
        Runner.unlink_failed += 1
144
145
146
def remove_directory(dirobj):
147
    """Attempt to remove a directory object for real."""
148
    log.debug('Removing directory: %s', dirobj)
149
    try:
150
        dirobj.rmdir()
151
        Runner.rmdir_count += 1
152
    except OSError as err:
153
        log.debug('Directory not removed. %s', err)
154
        Runner.rmdir_failed += 1
155
156
157
def print_filename(fileobj):
158
    """Only display the file name, used with --dry-run."""
159
    log.debug('Would delete file: %s', fileobj)
160
    Runner.unlink_count += 1
161
162
163
def print_dirname(dirobj):
164
    """Only display the directory name, used with --dry-run."""
165
    log.debug('Would delete directory: %s', dirobj)
166
    Runner.rmdir_count += 1
167
168
169
def pyclean(args):
170
    """Cross-platform cleaning of Python bytecode."""
171
    Runner.configure(args)
172
    
173
    # Ensure git_clean attribute exists (for tests that create args manually)
174
    if not hasattr(args, 'git_clean'):
175
        args.git_clean = False
176
177
    for dir_name in args.directory:
178
        dir_path = Path(dir_name)
179
180
        log.info('Cleaning directory %s', dir_path)
181
        descend_and_clean(dir_path, BYTECODE_FILES, BYTECODE_DIRS)
182
183
        for topic in args.debris:
184
            remove_debris_for(topic, dir_path)
185
186
        remove_freeform_targets(dir_path, args.erase, args.yes, args.dry_run)
187
188
        if args.folders:
189
            log.debug('Removing empty directories...')
190
            remove_empty_directories(dir_path)
191
192
    log.info(
193
        'Total %d files, %d directories %s.',
194
        Runner.unlink_count,
195
        Runner.rmdir_count,
196
        'would be removed' if args.dry_run else 'removed',
197
    )
198
199
    if Runner.unlink_failed or Runner.rmdir_failed:
200
        log.debug(
201
            '%d files, %d directories %s not be removed.',
202
            Runner.unlink_failed,
203
            Runner.rmdir_failed,
204
            'would' if args.dry_run else 'could',
205
        )
206
207
    # Suggest --debris option if it wasn't used
208
    if not args.debris:
209
        suggest_debris_option(args)
210
211
    # Run git clean as the very last step if requested
212
    if args.git_clean:
213
        log.info('Running git clean...')
214
        for dir_name in args.directory:
215
            dir_path = Path(dir_name)
216
            exit_code = run_git_clean(
217
                dir_path, 
218
                args.ignore, 
219
                dry_run=args.dry_run,
220
                force=args.yes
221
            )
222
            
223
            if exit_code == 128:
224
                # Directory is not under version control
225
                log.warning(
226
                    'Directory %s is not under version control. Skipping git clean.',
227
                    dir_path,
228
                )
229
            elif exit_code != 0:
230
                # Other git errors should cause immediate exit
231
                raise SystemExit(exit_code)
232
233
234
def descend_and_clean(directory, file_types, dir_names):
235
    """
236
    Walk and descend a directory tree, cleaning up files of a certain type
237
    along the way. Only delete directories if they are empty, in the end.
238
    """
239
    for child in sorted(directory.iterdir()):
240
        if child.is_file():
241
            if child.suffix in file_types:
242
                Runner.unlink(child)
243
        elif child.is_dir():
244
            if should_ignore(child, Runner.ignore):
245
                log.debug('Skipping %s', child)
246
            else:
247
                descend_and_clean(child, file_types, dir_names)
248
249
            if child.name in dir_names:
250
                Runner.rmdir(child)
251
        else:
252
            log.debug('Ignoring %s (neither a file nor a folder)', child)
253
254
255
def remove_debris_for(topic, directory):
256
    """
257
    Clean up debris for a specific topic.
258
    """
259
    log.debug('Scanning for debris of %s ...', topic.title())
260
261
    patterns = DEBRIS_TOPICS[topic]
262
    recursive_delete_debris(directory, patterns)
263
264
265
def remove_empty_directories(directory):
266
    """
267
    Recursively remove empty directories in the given directory tree.
268
269
    This walks the directory tree in post-order (bottom-up), attempting to
270
    remove directories that are empty.
271
    """
272
    try:
273
        subdirs = [
274
            Path(entry.path) for entry in os.scandir(directory) if entry.is_dir()
275
        ]
276
    except (OSError, PermissionError) as err:
277
        log.warning('Cannot access directory %s: %s', directory, err)
278
        return
279
280
    for subdir in subdirs:
281
        if should_ignore(subdir, Runner.ignore):
282
            log.debug('Skipping %s', subdir)
283
        else:
284
            remove_empty_directories(subdir)  # recurse down the hierarchy
285
            try:
286
                if next(subdir.iterdir(), None) is None:
287
                    Runner.rmdir(subdir)
288
            except (OSError, PermissionError) as err:
289
                log.debug('Cannot check or remove directory %s: %s', subdir, err)
290
291
292
def remove_freeform_targets(directory, glob_patterns, yes, dry_run=False):
293
    """
294
    Remove free-form targets using globbing.
295
296
    This is **potentially dangerous** since users can delete everything
297
    anywhere in their file system, including the entire project they're
298
    working on. For this reason, the implementation imposes the following
299
    (user experience-related) restrictions:
300
301
    - Deleting (directories) is not recursive, directory contents must be
302
      explicitly specified using globbing (e.g. ``dirname/**/*``).
303
    - The user is responsible for the deletion order, so that a directory
304
      is empty when it is attempted to be deleted.
305
    - A confirmation prompt for the deletion of every single file system
306
      object is shown (unless the ``--yes`` option is used, in addition).
307
    """
308
    for path_glob in glob_patterns:
309
        log.debug('Erase file system objects matching: %s', path_glob)
310
        delete_filesystem_objects(directory, path_glob, prompt=not yes, dry_run=dry_run)
311
312
313
def recursive_delete_debris(directory, patterns):
314
    """
315
    Recursively delete debris matching any of the given patterns.
316
317
    This function walks the directory tree once and applies all patterns
318
    at each level, avoiding redundant directory scans.
319
    """
320
    for pattern in patterns:
321
        delete_filesystem_objects(directory, pattern)
322
323
    try:
324
        subdirs = (
325
            Path(entry.path) for entry in os.scandir(directory) if entry.is_dir()
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable entry does not seem to be defined.
Loading history...
326
        )
327
    except (OSError, PermissionError) as err:
328
        log.warning('Cannot access directory %s: %s', directory, err)
329
        return
330
331
    for subdir in subdirs:
332
        if should_ignore(subdir, Runner.ignore):
333
            log.debug('Skipping %s', subdir)
334
        else:
335
            recursive_delete_debris(subdir, patterns)
336
337
338
def delete_filesystem_objects(directory, path_glob, prompt=False, dry_run=False):
339
    """
340
    Identifies all pathnames matching a specific glob pattern, and attempts
341
    to delete them in the proper order, optionally asking for confirmation.
342
343
    Implementation Note: We sort the file system objects in *reverse order*
344
    and first delete *all files* before removing directories. This way we
345
    make sure that the directories that are deepest down in the hierarchy
346
    are empty (for both files & directories) when we attempt to remove them.
347
    """
348
    all_names = sorted(directory.glob(path_glob), reverse=True)
349
    dirs = (name for name in all_names if name.is_dir() and not name.is_symlink())
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable name does not seem to be defined.
Loading history...
350
    files = (name for name in all_names if not name.is_dir() or name.is_symlink())
351
352
    for file_object in files:
353
        file_type = 'symlink' if file_object.is_symlink() else 'file'
354
        if (
355
            not dry_run
356
            and prompt
357
            and not confirm('Delete %s %s' % (file_type, file_object))
358
        ):
359
            Runner.unlink_failed += 1
360
            continue
361
        Runner.unlink(file_object)
362
363
    for dir_object in dirs:
364
        if (
365
            not dry_run
366
            and prompt
367
            and not confirm('Remove empty directory %s' % dir_object)
368
        ):
369
            Runner.rmdir_failed += 1
370
            continue
371
        Runner.rmdir(dir_object)
372
373
374
def confirm(message):
375
    """An interactive confirmation prompt."""
376
    try:
377
        answer = input('%s? ' % message)
378
        return answer.strip().lower() in ['y', 'yes']
379
    except KeyboardInterrupt:
380
        msg = 'Aborted by user.'
381
        raise SystemExit(msg)
382
383
384
def detect_debris_in_directory(directory):
385
    """
386
    Scan a directory for debris artifacts and return a list of detected topics.
387
    """
388
    detected_topics = []
389
390
    for topic, patterns in DEBRIS_TOPICS.items():
391
        for pattern in patterns:
392
            # Skip patterns that are for recursive cleanup (contain **)
393
            if '**' in pattern:
394
                continue
395
            # Check if the pattern matches anything in the directory
396
            matches = list(directory.glob(pattern))
397
            if matches:
398
                detected_topics.append(topic)
399
                break  # Found at least one match for this topic, move to next
400
401
    return detected_topics
402
403
404
def suggest_debris_option(args):
405
    """
406
    Suggest using the --debris option when it wasn't used.
407
    Optionally provide targeted suggestions based on detected artifacts.
408
    """
409
    # Collect all detected debris topics across all directories
410
    all_detected = set()
411
    for dir_name in args.directory:
412
        dir_path = Path(dir_name)
413
        if dir_path.exists():
414
            detected = detect_debris_in_directory(dir_path)
415
            all_detected.update(detected)
416
417
    if all_detected:
418
        # Provide targeted suggestion
419
        topics_str = ' '.join(sorted(all_detected))
420
        log.info(
421
            'Hint: Use --debris to also clean up build artifacts. Detected: %s',
422
            topics_str,
423
        )
424
    else:
425
        # Provide general suggestion
426
        log.info(
427
            'Hint: Use --debris to also clean up build artifacts '
428
            'from common Python development tools.',
429
        )
430
431
432
def run_git_clean(directory, ignore_patterns, dry_run=False, force=False):
433
    """
434
    Run git clean in the specified directory with appropriate flags.
435
436
    Args:
437
        directory: Path to the directory to clean
438
        ignore_patterns: List of patterns to exclude from git clean
439
        dry_run: If True, only show what would be deleted
440
        force: If True, delete without prompting (otherwise interactive)
441
442
    Returns:
443
        Exit code from git clean
444
    """
445
    # Build git clean command
446
    cmd = ['git', 'clean', '-dx']
447
448
    # Add exclude patterns for ignored directories
449
    for pattern in ignore_patterns:
450
        cmd.extend(['-e', pattern])
451
452
    # Add mode flag: -n for dry-run, -f for force, -i for interactive
453
    if dry_run:
454
        cmd.append('-n')
455
    elif force:
456
        cmd.append('-f')
457
    else:
458
        cmd.append('-i')
459
460
    log.debug('Running: %s in %s', ' '.join(cmd), directory)
461
462
    try:
463
        result = subprocess.run(
464
            cmd,
465
            cwd=directory,
466
            capture_output=True,
467
            text=True,
468
            check=False,
469
        )
470
471
        # Print output from git clean
472
        if result.stdout:
473
            print(result.stdout, end='')
474
        if result.stderr:
475
            print(result.stderr, end='')
476
477
        return result.returncode
478
479
    except FileNotFoundError:
480
        log.error('Git command not found. Cannot execute git clean.')
481
        return 127
482