Completed
Push — master ( 5f68ae...c3dae1 )
by Bjorn
01:12
created

list_files()   F

Complexity

Conditions 16

Size

Total Lines 45

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 16
c 2
b 0
f 0
dl 0
loc 45
rs 2.7326

2 Methods

Rating   Name   Duplication   Size   Complexity  
B keep_file() 0 9 5
B clean_dirs() 0 7 5

How to fix   Complexity   

Complexity

Complex classes like list_files() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
import argparse
3
import os
4
from hashlib import md5
5
from .path import Path
6
7
SKIPFILE_NAME = '.skipfile'
8
9
10
def read_skipfile(dirname='.', defaults=None):
11
    """The .skipfile should contain one entry per line,
12
       listing files/directories that should be skipped by
13
       :func:`list_files`.
14
    """
15
    if defaults is None:
16
        defaults = ['Makefile', 'make.bat', 'atlassian-ide-plugin.xml']
17
    try:
18
        return defaults + open(
19
            os.path.join(dirname, SKIPFILE_NAME)
20
        ).read().splitlines()
21
    except IOError:
22
        return defaults
23
24
25
def list_files(dirname='.', curdir=".", relative=True):
26
    """Yield (digest, fname) tuples for all interesting files
27
       in `dirname`.  The file names are relative to `curdir`
28
       unless otherwise specified.
29
    """
30
    skipdirs = ['__pycache__', '.git', '.svn', 'htmlcov', 'dist', 'build',
31
                '.idea', 'tasks', 'static', 'media', 'data', 'migrations',
32
                '.doctrees', '_static', 'node_modules', 'external',
33
                'jobs', 'tryout', 'tmp', '_coverage',
34
                ]
35
    skipexts = ['.pyc', '~', '.svg', '.txt', '.TTF', '.tmp', '.errmail',
36
                '.email', '.bat', '.dll', '.exe', '.Dll', '.jpg', '.gif',
37
                '.png', '.ico', '.db', '.md5']
38
    dirname = str(dirname)
39
    skipfiles = read_skipfile(dirname)
40
41
    def clean_dirs(directories):
42
        for d in directories:
43
            if d.endswith('.egg-info'):
44
                directories.remove(d)
45
        for d in skipdirs:
46
            if d in directories:
47
                directories.remove(d)
48
49
    def keep_file(filename, filepath):
50
        if filename.startswith('.'):
51
            return False
52
        if filepath in skipfiles:
53
            return False
54
        for ext in skipexts:
55
            if filename.endswith(ext):
56
                return False
57
        return True
58
59
    for root, dirs, files in os.walk(os.path.abspath(dirname)):
60
        clean_dirs(dirs)
61
        for fname in files:
62
            relpth = os.path.relpath(os.path.join(root, fname), dirname).replace('\\', '/')
63
64
            parts = Path(relpth).parts()
65
            if not keep_file(fname, relpth) or any(p.startswith('.') for p in parts):
66
                continue
67
68
            pth = os.path.join(dirname, relpth)
69
            yield md5(open(pth).read()).hexdigest(), relpth
70
71
72
def main():  # pragma: nocover
73
    p = argparse.ArgumentParser(add_help="Recursively list interesting files.")
74
    p.add_argument(
75
        'directory', nargs="?", default="",
76
        help="The directory to process (current dir if omitted)."
77
    )
78
    p.add_argument(
79
        '--verbose', '-v', action='store_true',
80
        help="Increase verbosity."
81
    )
82
83
    args = p.parse_args()
84
    args.curdir = os.getcwd()
85
    if not args.directory:
86
        args.direcotry = args.curdir
87
    if args.verbose:
88
        print args
89
90
    for chsm, fname in list_files(args.directory):
91
        print chsm, fname
92
93
94
if __name__ == "__main__":  # pragma: nocover
95
    main()
96