Passed
Push — master ( ea7c4a...d61422 )
by Matěj
01:18 queued 12s
created

find_duplicates.main()   B

Complexity

Conditions 6

Size

Total Lines 58
Code Lines 39

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 42

Importance

Changes 0
Metric Value
cc 6
eloc 39
nop 0
dl 0
loc 58
ccs 0
cts 25
cp 0
crap 42
rs 8.0106
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
#!/usr/bin/env python2
2
"""
3
    This script should find duplicates e.g. specific template is same as shared one
4
"""
5
import sys
6
import os
7
import re
8
import glob
9
import argparse
10
11
12
def recursive_globi(mask):
13
    """
14
    Simple replacement of glob.globi(mask, recursive=true)
15
    Reason: Older Python versions support
16
    """
17
18
    parts = mask.split("**/")
19
20
    if not len(parts) == 2:
21
        raise NotImplementedError
22
23
    search_root = parts[0]
24
25
    # instead of '*' use regex '.*'
26
    path_mask = parts[1].replace("*", ".*")
27
    re_path_mask = re.compile(path_mask + "$")
28
29
    for root, dirnames, filenames in os.walk(search_root):
30
        paths = filenames + dirnames
31
        for path in paths:
32
            full_path = os.path.join(root, path)
33
            if re_path_mask.search(full_path):
34
                yield full_path
35
36
37
class DuplicatesFinder(object):
38
    def __init__(self, root_dir, specific_dirs_mask, shared_dir, shared_files_mask):
39
        self._root_dir = root_dir
40
        self._specific_dirs_mask = os.path.join(root_dir, specific_dirs_mask)
41
        self._shared_dir = os.path.join(root_dir, shared_dir)
42
        self._clear_normalized()
43
        self._shared_files_mask = shared_files_mask
44
45
    def _clear_normalized(self):
46
        self._normalized = {}
47
48
    def _get_normalized(self, file_path):
49
        """
50
        Return cached normalized content of file
51
        :param file_path:
52
        :return:
53
        """
54
        if file_path in self._normalized:
55
            return self._normalized[file_path]
56
57
        with open(file_path, 'r') as content_file:
58
            content = content_file.read()
59
            normalized = self._normalize_content(content)
60
            self._normalized[file_path] = normalized
61
            return normalized
62
63
    def _compare_files(self, shared_filename, specific_filename):
64
        if not os.path.isfile(specific_filename):
65
            return False
66
67
        shared_normalized = self._get_normalized(shared_filename)
68
        specific_normalized = self._get_normalized(specific_filename)
69
70
        return shared_normalized == specific_normalized
71
72
    def _print_match(self, first_filename, second_filename):
73
        print("Duplicate found! {}\t=>\t{}".format(first_filename, second_filename))
74
75
    def search(self):
76
        """
77
        :return: True if any duplicate found
78
        """
79
        found = False
80
        self._clear_normalized()
81
82
        specific_dirs = list(self._specific_dirs())
83
84
        # Walk all shared files
85
        shared_files_mask = os.path.join(self._shared_dir, self._shared_files_mask)
86
        for shared_filename in glob.glob(shared_files_mask):
87
88
            basename = os.path.basename(shared_filename)
89
90
            # Walk all specific dirs
91
            for specific_dir in specific_dirs:
92
93
                # Get file to compare
94
                specific_filename = os.path.join(specific_dir, basename)
95
96
                # Compare
97
                if self._compare_files(shared_filename, specific_filename):
98
                    found = True
99
                    self._print_match(shared_filename, specific_filename)
100
101
        return found
102
103
    def _specific_dirs(self):
104
        for static_path in recursive_globi(self._specific_dirs_mask):
105
            if not static_path.startswith(self._shared_dir):
106
                yield static_path
107
108
    def _normalize_content(self, content):
109
        return content
110
111
112
class BashDuplicatesFinder(DuplicatesFinder):
113
    def __init__(self, root_dir, specific_dirs_mask, shared_dir, shared_files_mask="*.sh"):
114
        DuplicatesFinder.__init__(self, root_dir, specific_dirs_mask, shared_dir, shared_files_mask)
115
116
    def _normalize_content(self, content):
117
        # remove comments
118
        # naive implementation (todo)
119
        content = re.sub(r"^\s*#.*", "", content)
120
121
        # remove empty lines
122
        content = "\n".join([s for s in content.split("\n") if s])
123
124
        return content
125
126
127
class OvalDuplicatesFinder(DuplicatesFinder):
128
    def __init__(self, root_dir, specific_dirs_mask, shared_dir, shared_files_mask="*.xml"):
129
        DuplicatesFinder.__init__(self, root_dir, specific_dirs_mask, shared_dir, shared_files_mask)
130
131
    def _normalize_content(self, content):
132
        # remove comments
133
        # naive implementation (todo)
134
        content = re.sub(r"^\s*#.*", "", content)  # bash style comments - due to #platform
135
        content = re.sub('<!--.*?-->', "", content, flags=re.DOTALL)  # xml comments
136
137
        # remove empty lines
138
        content = "\n".join([s for s in content.split("\n") if s])
139
140
        return content
141
142
143
def parse_args():
144
    parser = argparse.ArgumentParser()
145
    parser.add_argument("root_ssg_directory", help="Path to root of ssg git repository")
146
    return parser.parse_args()
147
148
149
def main():
150
    """
151
    main function
152
    """
153
    args = parse_args()
154
    root_dir = args.root_ssg_directory
155
    without_duplicates = True
156
157
    # Static bash scripts
158
    print("Static bash files:")
159
    static_bash_finder = BashDuplicatesFinder(
160
        root_dir,
161
        os.path.join("**", "fixes", "bash"),
162
        os.path.join("shared", "fixes", "bash")
163
    )
164
    if static_bash_finder.search():
165
        without_duplicates = False
166
167
    # Templates bash scripts
168
    print("Bash templates:")
169
    template_bash_finder = BashDuplicatesFinder(
170
        root_dir,
171
        os.path.join("**", "templates"),
172
        os.path.join("shared", "templates"),
173
        "template_BASH_*"
174
    )
175
    if template_bash_finder.search():
176
        without_duplicates = False
177
178
    # Static oval files
179
    print("Static oval files:")
180
    static_oval_finder = OvalDuplicatesFinder(
181
        root_dir,
182
        os.path.join("**", "checks", "oval"),
183
        os.path.join("shared", "checks", "oval")
184
    )
185
    if static_oval_finder.search():
186
        without_duplicates = False
187
188
    # Templates oval files
189
    print("Templates oval files:")
190
    templates_oval_finder = OvalDuplicatesFinder(
191
        root_dir,
192
        os.path.join("**", "templates"),
193
        os.path.join("shared", "templates"),
194
        "template_OVAL_*"
195
    )
196
197
    if templates_oval_finder.search():
198
        without_duplicates = False
199
200
    # Scan results
201
    if without_duplicates:
202
        print("No duplicates found")
203
        sys.exit(0)
204
    else:
205
        print("Duplicates found!")
206
        sys.exit(1)
207
208
209
if __name__ == "__main__":
210
    main()
211