1
|
|
|
# -*- coding: utf-8 -*- |
2
|
|
|
import argparse |
3
|
|
|
import os |
4
|
|
|
from hashlib import md5 |
5
|
|
|
from .path import Path |
6
|
|
|
|
7
|
|
|
SKIPFILE_NAME = '.skipfile' |
8
|
|
|
|
9
|
|
|
|
10
|
|
|
def read_skipfile(dirname='.', defaults=None): |
11
|
|
|
"""The .skipfile should contain one entry per line, |
12
|
|
|
listing files/directories that should be skipped by |
13
|
|
|
:func:`list_files`. |
14
|
|
|
""" |
15
|
|
|
if defaults is None: |
16
|
|
|
defaults = ['Makefile', 'make.bat', 'atlassian-ide-plugin.xml'] |
17
|
|
|
try: |
18
|
|
|
return defaults + open( |
19
|
|
|
os.path.join(dirname, SKIPFILE_NAME) |
20
|
|
|
).read().splitlines() |
21
|
|
|
except IOError: |
22
|
|
|
return defaults |
23
|
|
|
|
24
|
|
|
|
25
|
|
|
def list_files(dirname='.', curdir=".", relative=True): |
26
|
|
|
"""Yield (digest, fname) tuples for all interesting files |
27
|
|
|
in `dirname`. The file names are relative to `curdir` |
28
|
|
|
unless otherwise specified. |
29
|
|
|
""" |
30
|
|
|
skipdirs = ['__pycache__', '.git', '.svn', 'htmlcov', 'dist', 'build', |
31
|
|
|
'.idea', 'tasks', 'static', 'media', 'data', 'migrations', |
32
|
|
|
'.doctrees', '_static', 'node_modules', 'external', |
33
|
|
|
'jobs', 'tryout', 'tmp', '_coverage', |
34
|
|
|
] |
35
|
|
|
skipexts = ['.pyc', '~', '.svg', '.txt', '.TTF', '.tmp', '.errmail', |
36
|
|
|
'.email', '.bat', '.dll', '.exe', '.Dll', '.jpg', '.gif', |
37
|
|
|
'.png', '.ico', '.db', '.md5'] |
38
|
|
|
dirname = str(dirname) |
39
|
|
|
skipfiles = read_skipfile(dirname) |
40
|
|
|
|
41
|
|
|
def clean_dirs(directories): |
42
|
|
|
for d in directories: |
43
|
|
|
if d.endswith('.egg-info'): |
44
|
|
|
directories.remove(d) |
45
|
|
|
for d in skipdirs: |
46
|
|
|
if d in directories: |
47
|
|
|
directories.remove(d) |
48
|
|
|
|
49
|
|
|
def keep_file(filename, filepath): |
50
|
|
|
if filename.startswith('.'): |
51
|
|
|
return False |
52
|
|
|
if filepath in skipfiles: |
53
|
|
|
return False |
54
|
|
|
for ext in skipexts: |
55
|
|
|
if filename.endswith(ext): |
56
|
|
|
return False |
57
|
|
|
return True |
58
|
|
|
|
59
|
|
|
for root, dirs, files in os.walk(os.path.abspath(dirname)): |
60
|
|
|
clean_dirs(dirs) |
61
|
|
|
for fname in files: |
62
|
|
|
relpth = os.path.relpath(os.path.join(root, fname), dirname).replace('\\', '/') |
63
|
|
|
|
64
|
|
|
parts = Path(relpth).parts() |
65
|
|
|
if not keep_file(fname, relpth) or any(p.startswith('.') for p in parts): |
66
|
|
|
continue |
67
|
|
|
|
68
|
|
|
pth = os.path.join(dirname, relpth) |
69
|
|
|
yield md5(open(pth).read()).hexdigest(), relpth |
70
|
|
|
|
71
|
|
|
|
72
|
|
|
def main(): # pragma: nocover |
73
|
|
|
p = argparse.ArgumentParser(add_help="Recursively list interesting files.") |
74
|
|
|
p.add_argument( |
75
|
|
|
'directory', nargs="?", default="", |
76
|
|
|
help="The directory to process (current dir if omitted)." |
77
|
|
|
) |
78
|
|
|
p.add_argument( |
79
|
|
|
'--verbose', '-v', action='store_true', |
80
|
|
|
help="Increase verbosity." |
81
|
|
|
) |
82
|
|
|
|
83
|
|
|
args = p.parse_args() |
84
|
|
|
args.curdir = os.getcwd() |
85
|
|
|
if not args.directory: |
86
|
|
|
args.direcotry = args.curdir |
87
|
|
|
if args.verbose: |
88
|
|
|
print args |
89
|
|
|
|
90
|
|
|
for chsm, fname in list_files(args.directory): |
91
|
|
|
print chsm, fname |
92
|
|
|
|
93
|
|
|
|
94
|
|
|
if __name__ == "__main__": # pragma: nocover |
95
|
|
|
main() |
96
|
|
|
|