clean_dirs()   B
last analyzed

Complexity

Conditions 5

Size

Total Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 5
c 1
b 0
f 0
dl 0
loc 9
rs 8.5454
1
# -*- coding: utf-8 -*-
2
"""List interesting files.
3
"""
4
from __future__ import print_function
5
import argparse
6
import os
7
from hashlib import md5
8
from .path import Path
9
10
SKIPFILE_NAME = '.skipfile'
11
12
13
def read_skipfile(dirname='.', defaults=None):
14
    """The .skipfile should contain one entry per line,
15
       listing files/directories that should be skipped by
16
       :func:`list_files`.
17
    """
18
    if defaults is None:
19
        defaults = ['Makefile', 'make.bat', 'atlassian-ide-plugin.xml']
20
    try:
21
        return defaults + open(
22
            os.path.join(dirname, SKIPFILE_NAME)
23
        ).read().splitlines()
24
    except IOError:
25
        return defaults
26
27
28
def list_files(dirname='.', digest=True):
29
    """Yield (digest, fname) tuples for all interesting files
30
       in `dirname`.
31
    """
32
    skipdirs = ['__pycache__', '.git', '.svn', 'htmlcov', 'dist', 'build',
33
                '.idea', 'tasks', 'static', 'media', 'data', 'migrations',
34
                '.doctrees', '_static', 'node_modules', 'external',
35
                'jobs', 'tryout', 'tmp', '_coverage',
36
               ]
37
    skipexts = ['.pyc', '~', '.svg', '.txt', '.TTF', '.tmp', '.errmail',
38
                '.email', '.bat', '.dll', '.exe', '.Dll', '.jpg', '.gif',
39
                '.png', '.ico', '.db', '.md5']
40
    dirname = str(dirname)
41
    skipfiles = read_skipfile(dirname)
42
43
    def clean_dirs(directories):
44
        """Remove directories that should be skipped.
45
        """
46
        for d in directories:
47
            if d.endswith('.egg-info'):
48
                directories.remove(d)
49
        for d in skipdirs:
50
            if d in directories:
51
                directories.remove(d)
52
53
    def keep_file(filename, filepath):
54
        """Returns False if the file should be skipped.
55
        """
56
        if filename.startswith('.'):
57
            return False
58
        if filepath in skipfiles:
59
            return False
60
        for ext in skipexts:
61
            if filename.endswith(ext):
62
                return False
63
        return True
64
65
    for root, dirs, files in os.walk(os.path.abspath(dirname)):
66
        clean_dirs(dirs)
67
        for fname in files:
68
            relpth = os.path.relpath(
69
                os.path.join(root, fname),
70
                dirname
71
            ).replace('\\', '/')
72
73
            parts = Path(relpth).parts()
74
            if not keep_file(fname, relpth) or \
75
                    any(p.startswith('.') for p in parts):
76
                continue
77
78
            pth = os.path.join(dirname, relpth)
79
            if digest:
80
                yield md5(open(pth, 'rb').read()).hexdigest(), relpth
81
            else:
82
                yield relpth
83
84
85
def main():  # pragma: nocover
86
    """Print checksum and file name for all files in the directory.
87
    """
88
    p = argparse.ArgumentParser(add_help="Recursively list interesting files.")
89
    p.add_argument(
90
        'directory', nargs="?", default="",
91
        help="The directory to process (current dir if omitted)."
92
    )
93
    p.add_argument(
94
        '--verbose', '-v', action='store_true',
95
        help="Increase verbosity."
96
    )
97
98
    args = p.parse_args()
99
    args.curdir = os.getcwd()
100
    if not args.directory:
101
        args.direcotry = args.curdir
102
    if args.verbose:
103
        print(args)
104
105
    for chsm, fname in list_files(args.directory):
106
        print(chsm, fname)
107
108
109
if __name__ == "__main__":  # pragma: nocover
110
    main()
111