|
1
|
|
|
# -*- coding: utf-8 -*- |
|
2
|
|
|
import argparse |
|
3
|
|
|
import os |
|
4
|
|
|
from hashlib import md5 |
|
5
|
|
|
from .path import Path |
|
6
|
|
|
|
|
7
|
|
|
SKIPFILE_NAME = '.skipfile' |
|
8
|
|
|
|
|
9
|
|
|
|
|
10
|
|
|
def read_skipfile(dirname='.', defaults=None): |
|
11
|
|
|
"""The .skipfile should contain one entry per line, |
|
12
|
|
|
listing files/directories that should be skipped by |
|
13
|
|
|
:func:`list_files`. |
|
14
|
|
|
""" |
|
15
|
|
|
if defaults is None: |
|
16
|
|
|
defaults = ['Makefile', 'make.bat', 'atlassian-ide-plugin.xml'] |
|
17
|
|
|
try: |
|
18
|
|
|
return defaults + open( |
|
19
|
|
|
os.path.join(dirname, SKIPFILE_NAME) |
|
20
|
|
|
).read().splitlines() |
|
21
|
|
|
except IOError: |
|
22
|
|
|
return defaults |
|
23
|
|
|
|
|
24
|
|
|
|
|
25
|
|
|
def list_files(dirname='.', curdir=".", relative=True): |
|
26
|
|
|
"""Yield (digest, fname) tuples for all interesting files |
|
27
|
|
|
in `dirname`. The file names are relative to `curdir` |
|
28
|
|
|
unless otherwise specified. |
|
29
|
|
|
""" |
|
30
|
|
|
skipdirs = ['__pycache__', '.git', '.svn', 'htmlcov', 'dist', 'build', |
|
31
|
|
|
'.idea', 'tasks', 'static', 'media', 'data', 'migrations', |
|
32
|
|
|
'.doctrees', '_static', 'node_modules', 'external', |
|
33
|
|
|
'jobs', 'tryout', 'tmp', '_coverage', |
|
34
|
|
|
] |
|
35
|
|
|
skipexts = ['.pyc', '~', '.svg', '.txt', '.TTF', '.tmp', '.errmail', |
|
36
|
|
|
'.email', '.bat', '.dll', '.exe', '.Dll', '.jpg', '.gif', |
|
37
|
|
|
'.png', '.ico', '.db', '.md5'] |
|
38
|
|
|
dirname = str(dirname) |
|
39
|
|
|
skipfiles = read_skipfile(dirname) |
|
40
|
|
|
|
|
41
|
|
|
def clean_dirs(directories): |
|
42
|
|
|
for d in directories: |
|
43
|
|
|
if d.endswith('.egg-info'): |
|
44
|
|
|
directories.remove(d) |
|
45
|
|
|
for d in skipdirs: |
|
46
|
|
|
if d in directories: |
|
47
|
|
|
directories.remove(d) |
|
48
|
|
|
|
|
49
|
|
|
def keep_file(filename, filepath): |
|
50
|
|
|
if filename.startswith('.'): |
|
51
|
|
|
return False |
|
52
|
|
|
if filepath in skipfiles: |
|
53
|
|
|
return False |
|
54
|
|
|
for ext in skipexts: |
|
55
|
|
|
if filename.endswith(ext): |
|
56
|
|
|
return False |
|
57
|
|
|
return True |
|
58
|
|
|
|
|
59
|
|
|
for root, dirs, files in os.walk(os.path.abspath(dirname)): |
|
60
|
|
|
clean_dirs(dirs) |
|
61
|
|
|
for fname in files: |
|
62
|
|
|
relpth = os.path.relpath(os.path.join(root, fname), dirname).replace('\\', '/') |
|
63
|
|
|
|
|
64
|
|
|
parts = Path(relpth).parts() |
|
65
|
|
|
if not keep_file(fname, relpth) or any(p.startswith('.') for p in parts): |
|
66
|
|
|
continue |
|
67
|
|
|
|
|
68
|
|
|
pth = os.path.join(dirname, relpth) |
|
69
|
|
|
yield md5(open(pth).read()).hexdigest(), relpth |
|
70
|
|
|
|
|
71
|
|
|
|
|
72
|
|
|
def main(): # pragma: nocover |
|
73
|
|
|
p = argparse.ArgumentParser(add_help="Recursively list interesting files.") |
|
74
|
|
|
p.add_argument( |
|
75
|
|
|
'directory', nargs="?", default="", |
|
76
|
|
|
help="The directory to process (current dir if omitted)." |
|
77
|
|
|
) |
|
78
|
|
|
p.add_argument( |
|
79
|
|
|
'--verbose', '-v', action='store_true', |
|
80
|
|
|
help="Increase verbosity." |
|
81
|
|
|
) |
|
82
|
|
|
|
|
83
|
|
|
args = p.parse_args() |
|
84
|
|
|
args.curdir = os.getcwd() |
|
85
|
|
|
if not args.directory: |
|
86
|
|
|
args.direcotry = args.curdir |
|
87
|
|
|
if args.verbose: |
|
88
|
|
|
print args |
|
89
|
|
|
|
|
90
|
|
|
for chsm, fname in list_files(args.directory): |
|
91
|
|
|
print chsm, fname |
|
92
|
|
|
|
|
93
|
|
|
|
|
94
|
|
|
if __name__ == "__main__": # pragma: nocover |
|
95
|
|
|
main() |
|
96
|
|
|
|