1
|
|
|
#!/usr/bin/env python2.7 |
2
|
|
|
# Licensed to the StackStorm, Inc ('StackStorm') under one or more |
3
|
|
|
# contributor license agreements. See the NOTICE file distributed with |
4
|
|
|
# this work for additional information regarding copyright ownership. |
5
|
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0 |
6
|
|
|
# (the "License"); you may not use this file except in compliance with |
7
|
|
|
# the License. You may obtain a copy of the License at |
8
|
|
|
# |
9
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0 |
10
|
|
|
# |
11
|
|
|
# Unless required by applicable law or agreed to in writing, software |
12
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS, |
13
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14
|
|
|
# See the License for the specific language governing permissions and |
15
|
|
|
# limitations under the License. |
16
|
|
|
|
17
|
|
|
from __future__ import print_function |
18
|
|
|
import collections |
19
|
|
|
import fnmatch |
20
|
|
|
import os |
21
|
|
|
import re |
22
|
|
|
import sys |
23
|
|
|
|
24
|
|
|
from tabulate import tabulate |
25
|
|
|
import six |
26
|
|
|
|
27
|
|
|
LOG_ALERT_PERCENT = 5 # default. |
28
|
|
|
|
29
|
|
|
EVILS = [ |
30
|
|
|
'info', |
31
|
|
|
'debug', |
32
|
|
|
'warning', |
33
|
|
|
'exception', |
34
|
|
|
'error', |
35
|
|
|
'audit' |
36
|
|
|
] |
37
|
|
|
|
38
|
|
|
LOG_VARS = [ |
39
|
|
|
'LOG', |
40
|
|
|
'Log', |
41
|
|
|
'log', |
42
|
|
|
'LOGGER', |
43
|
|
|
'Logger', |
44
|
|
|
'logger', |
45
|
|
|
'logging', |
46
|
|
|
'LOGGING' |
47
|
|
|
] |
48
|
|
|
|
49
|
|
|
FILE_LOG_COUNT = collections.defaultdict() |
50
|
|
|
FILE_LINE_COUNT = collections.defaultdict() |
51
|
|
|
|
52
|
|
|
|
53
|
|
|
def _parse_args(args): |
54
|
|
|
global LOG_ALERT_PERCENT |
|
|
|
|
55
|
|
|
params = {} |
56
|
|
|
if len(args) > 1: |
57
|
|
|
params['alert_percent'] = args[1] |
58
|
|
|
LOG_ALERT_PERCENT = int(args[1]) |
59
|
|
|
return params |
60
|
|
|
|
61
|
|
|
|
62
|
|
|
def _skip_file(filename): |
63
|
|
|
if filename.startswith('.') or filename.startswith('_'): |
64
|
|
|
return True |
65
|
|
|
|
66
|
|
|
|
67
|
|
|
def _get_files(dir_path): |
68
|
|
|
if not os.path.exists(dir_path): |
69
|
|
|
print('Directory %s doesn\'t exist.' % dir_path) |
70
|
|
|
|
71
|
|
|
files = [] |
72
|
|
|
exclude = set(['virtualenv', 'build', '.tox']) |
73
|
|
|
for root, dirnames, filenames in os.walk(dir_path): |
74
|
|
|
dirnames[:] = [d for d in dirnames if d not in exclude] |
75
|
|
|
for filename in fnmatch.filter(filenames, '*.py'): |
76
|
|
|
if not _skip_file(filename): |
77
|
|
|
files.append(os.path.join(root, filename)) |
78
|
|
|
return files |
79
|
|
|
|
80
|
|
|
|
81
|
|
|
# TODO: Regex compiling will be faster but I cannot get it to work :( |
82
|
|
|
def _build_regex(): |
83
|
|
|
regex_strings = {} |
84
|
|
|
regexes = {} |
85
|
|
|
for level in EVILS: |
86
|
|
|
regex_string = '|'.join(['\.'.join([log, level]) for log in LOG_VARS]) |
|
|
|
|
87
|
|
|
regex_strings[level] = regex_string |
88
|
|
|
# print('Level: %s, regex_string: %s' % (level, regex_strings[level])) |
89
|
|
|
regexes[level] = re.compile(regex_strings[level]) |
90
|
|
|
return regexes |
91
|
|
|
|
92
|
|
|
|
93
|
|
|
def _regex_match(line, regexes): |
94
|
|
|
pass |
95
|
|
|
|
96
|
|
|
|
97
|
|
|
def _build_str_matchers(): |
98
|
|
|
match_strings = {} |
99
|
|
|
for level in EVILS: |
100
|
|
|
match_strings[level] = ['.'.join([log, level]) for log in LOG_VARS] |
101
|
|
|
return match_strings |
102
|
|
|
|
103
|
|
|
|
104
|
|
|
def _get_log_count_dict(): |
105
|
|
|
return [(level, 0) for level in EVILS] |
106
|
|
|
|
107
|
|
|
|
108
|
|
|
def _alert(fil, lines, logs, logs_level): |
109
|
|
|
print('WARNING: Too many logs!!!: File: %s, total lines: %d, log lines: %d, percent: %f, ' |
110
|
|
|
'logs: %s' % (fil, lines, logs, float(logs) / lines * 100, logs_level)) |
111
|
|
|
|
112
|
|
|
|
113
|
|
|
def _match(line, match_strings): |
114
|
|
|
for level, match_strings in six.iteritems(match_strings): |
115
|
|
|
for match_string in match_strings: |
116
|
|
|
if line.startswith(match_string): |
117
|
|
|
# print('Line: %s, match: %s' % (line, match_string)) |
118
|
|
|
return True, level, line |
119
|
|
|
return False, 'UNKNOWN', line |
120
|
|
|
|
121
|
|
|
|
122
|
|
|
def _detect_log_lines(fil, matchers): |
123
|
|
|
global FILE_LOG_COUNT |
|
|
|
|
124
|
|
|
FILE_LOG_COUNT[fil] = dict(_get_log_count_dict()) |
125
|
|
|
# print('Working on file: %s' % fil) |
126
|
|
|
with open(fil) as f: |
127
|
|
|
lines = f.readlines() |
128
|
|
|
FILE_LINE_COUNT[fil] = len(lines) |
129
|
|
|
|
130
|
|
|
ln = 0 |
131
|
|
|
for line in lines: |
132
|
|
|
l = line.strip() |
133
|
|
|
ln += 1 |
134
|
|
|
matched, level, line = _match(l, matchers) |
135
|
|
|
if matched: |
136
|
|
|
# print('File: %s, Level: %s, Line: %d:%s' % (fil, level, ln, line.strip())) |
137
|
|
|
FILE_LOG_COUNT[fil][level] += 1 |
138
|
|
|
|
139
|
|
|
|
140
|
|
|
def _post_process(file_dir): |
141
|
|
|
alerts = [] |
142
|
|
|
for fil, lines in six.iteritems(FILE_LINE_COUNT): |
143
|
|
|
log_lines_count_level = FILE_LOG_COUNT[fil] |
144
|
|
|
total_log_count = 0 |
145
|
|
|
for level, count in six.iteritems(log_lines_count_level): |
146
|
|
|
total_log_count += count |
147
|
|
|
if total_log_count > 0: |
148
|
|
|
if float(total_log_count) / lines * 100 > LOG_ALERT_PERCENT: |
149
|
|
|
if file_dir in fil: |
150
|
|
|
fil = fil[len(file_dir) + 1:] |
151
|
|
|
alerts.append([fil, lines, total_log_count, float(total_log_count) / lines * 100, |
152
|
|
|
log_lines_count_level['audit'], |
153
|
|
|
log_lines_count_level['exception'], |
154
|
|
|
log_lines_count_level['error'], |
155
|
|
|
log_lines_count_level['warning'], |
156
|
|
|
log_lines_count_level['info'], |
157
|
|
|
log_lines_count_level['debug']]) |
158
|
|
|
# sort by percent |
159
|
|
|
alerts.sort(key=lambda alert: alert[3], reverse=True) |
160
|
|
|
print(tabulate(alerts, headers=['File', 'Lines', 'Logs', 'Percent', 'adt', 'exc', 'err', 'wrn', |
161
|
|
|
'inf', 'dbg'])) |
162
|
|
|
|
163
|
|
|
|
164
|
|
|
def main(args): |
165
|
|
|
params = _parse_args(args) |
166
|
|
|
file_dir = params.get('dir', os.getcwd()) |
167
|
|
|
files = _get_files(file_dir) |
168
|
|
|
matchers = _build_str_matchers() |
169
|
|
|
for f in files: |
170
|
|
|
_detect_log_lines(f, matchers) |
171
|
|
|
_post_process(file_dir) |
172
|
|
|
|
173
|
|
|
|
174
|
|
|
if __name__ == '__main__': |
175
|
|
|
main(sys.argv) |
176
|
|
|
|
Usage of
global
can make code hard to read and test, its usage is generally not recommended unless you are dealing with legacy code.