Passed
Pull Request — master (#3193)
by Alexander
02:07
created

ssg.rule_dir_stats._walk_rule()   C

Complexity

Conditions 9

Size

Total Lines 28
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 90

Importance

Changes 0
Metric Value
cc 9
eloc 17
nop 5
dl 0
loc 28
ccs 0
cts 17
cp 0
crap 90
rs 6.6666
c 0
b 0
f 0
1
"""
2
This module contains common code shared by utils/rule_dir_stats.py and
3
utils/rule_dir_diff.py. This code includes functions for walking the output
4
of the utils/rule_dir_json.py script, and filtering functions used in both
5
scripts.
6
"""
7
8
from __future__ import absolute_import
9
from __future__ import print_function
10
11
import os
12
from collections import defaultdict
13
14
from .build_remediations import REMEDIATION_TO_EXT_MAP as REMEDIATION_MAP
15
from .utils import subset_dict
16
17
18
def get_affected_products(rule_obj):
19
    """
20
    From a rule_obj, return the set of affected products from rule.yml
21
    """
22
    return set(rule_obj['products'])
23
24
25
def get_all_affected_products(args, rule_obj):
26
    """
27
    From a rule_obj, return the set of affected products from rule.yml, and
28
    all fixes and checks.
29
30
    If args.strict is set, this function is equivalent to
31
    get_affected_products. Otherwise, it includes ovals and fix content based
32
    on the values of args.fixes_only and args.ovals_only.
33
    """
34
35
    affected_products = get_affected_products(rule_obj)
36
37
    if args.strict:
38
        return affected_products
39
40
    if not args.fixes_only:
41
        for product in rule_obj['oval_products']:
42
            affected_products.add(product)
43
44
    if not args.ovals_only:
45
        for product in rule_obj['remediation_products']:
46
            affected_products.add(product)
47
48
    return affected_products
49
50
51
def _walk_rule(args, rule_obj, oval_func, remediation_func, verbose_output):
52
    """
53
    Walks a single rule and updates verbose_output if visited. Returns visited
54
    state as a boolean.
55
56
    Internal function for walk_rules and walk_rules_parallel.
57
    """
58
59
    rule_id = rule_obj['id']
60
61
    affected_products = get_all_affected_products(args, rule_obj)
62
    if not affected_products.intersection(args.products):
63
        return False
64
    if args.query and rule_id not in args.query:
65
        return False
66
67
    if not args.fixes_only:
68
        result = oval_func(rule_obj)
69
        if result:
70
            verbose_output[rule_id]['oval'] = result
71
72
    if not args.ovals_only:
73
        for r_type in REMEDIATION_MAP:
74
            result = remediation_func(rule_obj, r_type)
75
            if result:
76
                verbose_output[rule_id][r_type] = result
77
78
    return True
79
80
def walk_rules(args, known_rules, oval_func, remediation_func):
81
    """
82
    Walk a dictionary of known_rules, returning the number of visited rules
83
    and the output at each visited rule, conditionally calling oval_func and
84
    remediation_func based on the values of args.fixes_only and
85
    args.ovals_only. If the result of these functions are not Falsy, set the
86
    appropriate output content.
87
88
    The input rule_obj structure is the value of known_rules[rule_id].
89
90
    The output structure is a dict as follows:
91
    {
92
        rule_id: {
93
            "oval": oval_func(args, rule_obj),
94
            "ansible": remediation_func(args, "ansible", rule_obj),
95
            "anaconda": remediation_func(args, "anaconda", rule_obj),
96
            "bash": remediation_func(args, "bash", rule_obj),
97
            "puppet": remediation_func(args, "puppet", rule_obj)
98
        },
99
        ...
100
    }
101
102
    The arguments supplied to oval_func are args and rule_obj.
103
    The arguments supplied to remediation_func are args, the remediation type,
104
    and rule_obj.
105
    """
106
107
    affected_rules = 0
108
    verbose_output = defaultdict(lambda: defaultdict(lambda: None))
109
110
    for rule_id in known_rules:
111
        rule_obj = known_rules[rule_id]
112
        if _walk_rule(args, rule_obj, oval_func, remediation_func, verbose_output):
113
            affected_rules += 1
114
115
    return affected_rules, verbose_output
116
117
118
def walk_rule_stats(rule_output):
119
    """
120
    Walk the output of a rule, generating statistics about affected
121
    ovals, remediations, and generating verbose output in a stable order.
122
123
    Returns a tuple of (affected_ovals, affected_remediations,
124
    all_affected_remediations, affected_remediations_type, all_output)
125
    """
126
127
    affected_ovals = 0
128
    affected_remediations = 0
129
    all_affected_remediations = 0
130
    affected_remediations_type = defaultdict(lambda: 0)
131
    all_output = []
132
133
    affected_remediation = False
134
    all_remedation = True
135
136
    if 'oval' in rule_output:
137
        affected_ovals += 1
138
        all_output.append(rule_output['oval'])
139
140
    for r_type in sorted(REMEDIATION_MAP):
141
        if r_type in rule_output:
142
            affected_remediation = True
143
            affected_remediations_type[r_type] += 1
144
            all_output.append(rule_output[r_type])
145
        else:
146
            all_remedation = False
147
148
    if affected_remediation:
149
        affected_remediations += 1
150
    if all_remedation:
151
        all_affected_remediations += 1
152
153
    return (affected_ovals, affected_remediations, all_affected_remediations,
154
            affected_remediations_type, all_output)
155
156
157
def walk_rules_stats(args, known_rules, oval_func, remediation_func):
158
    """
159
    Walk a dictionary of known_rules and generate simple aggregate statistics
160
    for all visited rules. The oval_func and remediation_func arguments behave
161
    according to walk_rules().
162
163
    Returned values are visited_rules, affected_ovals, affected_remediation,
164
    a dictionary containing all fix types and the quantity of affected fixes,
165
    and the ordered output of all functions.
166
167
    An effort is made to provide consistently ordered verbose_output by
168
    sorting all visited keys and the keys of
169
    ssg.build_remediations.REMEDIATION_MAP.
170
    """
171
    affected_rules, verbose_output = walk_rules(args, known_rules, oval_func, remediation_func)
172
173
    affected_ovals = 0
174
    affected_remediations = 0
175
    all_affected_remediations = 0
176
    affected_remediations_type = defaultdict(lambda: 0)
177
    all_output = []
178
179
    for rule_id in sorted(verbose_output):
180
        rule_output = verbose_output[rule_id]
181
        results = walk_rule_stats(rule_output)
182
183
        affected_ovals += results[0]
184
        affected_remediations += results[1]
185
        all_affected_remediations += results[2]
186
        for key in results[3]:
187
            affected_remediations_type[key] += results[3][key]
188
189
        all_output.extend(results[4])
190
191
    return (affected_rules, affected_ovals, affected_remediations,
192
            all_affected_remediations, affected_remediations_type, all_output)
193
194
195
def walk_rules_parallel(args, left_rules, right_rules, oval_func, remediation_func):
0 ignored issues
show
Comprehensibility introduced by
This function exceeds the maximum number of variables (21/15).
Loading history...
196
    """
197
    Walks two sets of known_rules (left_rules and right_rules) with identical
198
    keys and returns left_only, right_only, and common_only output from
199
    _walk_rule. If the outputted data for a rule when called on left_rules and
200
    right_rules is the same, it is added to common_only. Only rules which
201
    output different data will have their data added to left_only and
202
    right_only respectively.
203
204
    Can assert.
205
    """
206
207
    left_affected_rules = 0
208
    right_affected_rules = 0
209
    common_affected_rules = 0
210
211
    left_verbose_output = defaultdict(lambda: defaultdict(lambda: None))
212
    right_verbose_output = defaultdict(lambda: defaultdict(lambda: None))
213
    common_verbose_output = defaultdict(lambda: defaultdict(lambda: None))
214
215
    assert set(left_rules) == set(right_rules)
216
217
    for rule_id in left_rules:
218
        left_rule_obj = left_rules[rule_id]
219
        right_rule_obj = right_rules[rule_id]
220
221
        if left_rule_obj == right_rule_obj:
222
            if _walk_rule(args, left_rule_obj, oval_func, remediation_func, common_verbose_output):
223
                common_affected_rules += 1
224
        else:
225
            left_temp = defaultdict(lambda: defaultdict(lambda: None))
226
            right_temp = defaultdict(lambda: defaultdict(lambda: None))
227
228
            left_ret = _walk_rule(args, left_rule_obj, oval_func, remediation_func, left_temp)
229
            right_ret = _walk_rule(args, right_rule_obj, oval_func, remediation_func, right_temp)
230
231
            if left_ret == right_ret and left_temp == right_temp:
232
                common_verbose_output.update(left_temp)
233
                if left_ret:
234
                    common_affected_rules += 1
235
            else:
236
                left_verbose_output.update(left_temp)
237
                right_verbose_output.update(right_temp)
238
                if left_ret:
239
                    left_affected_rules += 1
240
                if right_ret:
241
                    right_affected_rules += 1
242
243
    left_only = (left_affected_rules, left_verbose_output)
244
    right_only = (right_affected_rules, right_verbose_output)
245
    common_only = (common_affected_rules, common_verbose_output)
246
247
    return left_only, right_only, common_only
248
249
250
def walk_rules_diff(args, left_rules, right_rules, oval_func, remediation_func):
0 ignored issues
show
Comprehensibility introduced by
This function exceeds the maximum number of variables (22/15).
Loading history...
251
    """
252
    Walk a two dictionary of known_rules (left_rules and right_rules) and generate
253
    five sets of output: left_only rules output, right_only rules output,
254
    shared left output, shared right output, and shared common output, as a
255
    five-tuple, where each tuple element is equivalent to walk_rules on the
256
    appropriate set of rules.
257
258
    Does not understand renaming of rule_ids as this would depend on disk
259
    content to reflect these differences. Unless significantly more data is
260
    added to the rule_obj structure (contents of rule.yml, ovals,
261
    remediations, etc.), all information besides 'title' is not uniquely
262
    identifying or could be easily updated.
263
    """
264
265
    left_rule_ids = set(left_rules)
266
    right_rule_ids = set(right_rules)
267
268
    left_only_rule_ids = left_rule_ids.difference(right_rule_ids)
269
    right_only_rule_ids = right_rule_ids.difference(left_rule_ids)
270
    common_rule_ids = left_rule_ids.intersection(right_rule_ids)
271
272
    left_restricted = subset_dict(left_rules, left_only_rule_ids)
273
    left_common = subset_dict(left_rules, common_rule_ids)
274
    right_restricted = subset_dict(right_rules, right_only_rule_ids)
275
    right_common = subset_dict(right_rules, common_rule_ids)
276
277
    left_only_data = walk_rules(args, left_restricted, oval_func, remediation_func)
278
    right_only_data = walk_rules(args, right_restricted, oval_func, remediation_func)
279
    l_c_d, r_c_d, c_d = walk_rules_parallel(args, left_common, right_common,
280
                                            oval_func, remediation_func)
281
282
    left_changed_data = l_c_d
283
    right_changed_data = r_c_d
284
    common_data = c_d
285
286
    return (left_only_data, right_only_data, left_changed_data, right_changed_data, common_data)
287
288
289
def walk_rules_diff_stats(results):
290
    """
291
    Takes the results of walk_rules_diff (results) and generates five sets of
292
    output statistics: left_only rules output, right_only rules output,
293
    shared left output, shared right output, and shared common output, as a
294
    five-tuple, where each tuple element is equivalent to walk_rules_stats on
295
    the appropriate set of rules.
296
297
    Can assert.
298
    """
299
300
    assert len(results) == 5
301
302
    output_data = []
303
304
    for data in results:
305
        affected_rules, verbose_output = data
306
307
        affected_ovals = 0
308
        affected_remediations = 0
309
        all_affected_remediations = 0
310
        affected_remediations_type = defaultdict(lambda: 0)
311
        all_output = []
312
313
        for rule_id in sorted(verbose_output):
314
            rule_output = verbose_output[rule_id]
315
            _results = walk_rule_stats(rule_output)
316
317
            affected_ovals += _results[0]
318
            affected_remediations += _results[1]
319
            all_affected_remediations += _results[2]
320
            for key in _results[3]:
321
                affected_remediations_type[key] += _results[3][key]
322
323
            all_output.extend(_results[4])
324
325
        output_data.append((affected_rules, affected_ovals,
326
                            affected_remediations, all_affected_remediations,
327
                            affected_remediations_type, all_output))
328
329
    assert len(output_data) == 5
330
331
    return tuple(output_data)
332
333
334
def filter_rule_ids(all_keys, queries):
335
    """
336
    From a set of queries (a comma separated list of queries, where a query is either a
337
    rule id or a substring thereof), return the set of matching keys from all_keys. When
338
    queries is the literal string "all", return all of the keys.
339
    """
340
341
    if not queries:
342
        return set()
343
344
    if queries == 'all':
345
        return set(all_keys)
346
347
    # We assume that all_keys is much longer than queries; this allows us to do
348
    # len(all_keys) iterations of size len(query_parts) instead of len(query_parts)
349
    # queries of size len(all_keys) -- which hopefully should be a faster data access
350
    # pattern due to caches but in reality shouldn't matter. Note that we have to iterate
351
    # over the keys in all_keys either way, because we wish to check whether query is a
352
    # substring of a key, not whether query is a key.
353
    #
354
    # This does have the side-effect of not having the results be ordered according to
355
    # their order in query_parts, so we instead, we intentionally discard order by using
356
    # a set. This also guarantees that our results are unique.
357
    results = set()
358
    query_parts = queries.split(',')
359
    for key in all_keys:
360
        for query in query_parts:
361
            if query in key:
362
                results.add(key)
363
364
    return results
365
366
367
def missing_oval(rule_obj):
368
    """
369
    For a rule object, check if it is missing an oval.
370
    """
371
372
    rule_id = rule_obj['id']
373
    check = len(rule_obj['ovals']) > 0
374
    if not check:
375
        return "\trule_id:%s is missing all OVALs" % rule_id
376
377
378
def missing_remediation(rule_obj, r_type):
379
    """
380
    For a rule object, check if it is missing a remediation of type r_type.
381
    """
382
383
    rule_id = rule_obj['id']
384
    check = len(rule_obj['remediations'][r_type]) > 0
385
    if not check:
386
        return "\trule_id:%s is missing %s remediations" % (rule_id, r_type)
387
388
389
def two_plus_oval(rule_obj):
390
    """
391
    For a rule object, check if it has two or more OVALs.
392
    """
393
394
    rule_id = rule_obj['id']
395
    check = len(rule_obj['ovals']) >= 2
396
    if check:
397
        return "\trule_id:%s has two or more OVALs: %s" % (rule_id, ','.join(rule_obj['ovals']))
398
399
400
def two_plus_remediation(rule_obj, r_type):
401
    """
402
    For a rule object, check if it has two or more remediations of type r_type.
403
    """
404
405
    rule_id = rule_obj['id']
406
    check = len(rule_obj['remediations'][r_type]) >= 2
407
    if check:
408
        return "\trule_id:%s has two or more %s remediations: %s" % \
409
               (rule_id, r_type, ','.join(rule_obj['remediations'][r_type]))
410
411
412
def prodtypes_oval(rule_obj):
413
    """
414
    For a rule object, check if the prodtypes match between the YAML and the
415
    OVALs.
416
    """
417
418
    rule_id = rule_obj['id']
419
420
    rule_products = set(rule_obj['products'])
421
    if not rule_products:
422
        return
423
424
    oval_products = set()
425
    for oval in rule_obj['ovals']:
426
        oval_products.update(rule_obj['ovals'][oval]['products'])
427
    if not oval_products:
428
        return
429
430
    sym_diff = sorted(rule_products.symmetric_difference(oval_products))
431
    check = len(sym_diff) > 0
432
    if check:
433
        return "\trule_id:%s has a different prodtypes between YAML and OVALs: %s" % \
434
               (rule_id, ','.join(sym_diff))
435
436
437
def prodtypes_remediation(rule_obj, r_type):
438
    """
439
    For a rule object, check if the prodtypes match between the YAML and the
440
    remediations of type r_type.
441
    """
442
443
    rule_id = rule_obj['id']
444
445
    rule_products = set(rule_obj['products'])
446
    if not rule_products:
447
        return
448
449
    remediation_products = set()
450
    for remediation in rule_obj['remediations'][r_type]:
451
        remediation_products.update(rule_obj['remediations'][r_type][remediation]['products'])
452
    if not remediation_products:
453
        return
454
455
    sym_diff = sorted(rule_products.symmetric_difference(remediation_products))
456
    check = len(sym_diff) > 0 and rule_products and remediation_products
457
    if check:
458
        return "\trule_id:%s has a different prodtypes between YAML and %s remediations: %s" % \
459
               (rule_id, r_type, ','.join(sym_diff))
460
461
462
def product_names_oval(rule_obj):
463
    """
464
    For a rule_obj, check the scope of the platforms versus the product name
465
    of the OVAL objects.
466
    """
467
468
    rule_id = rule_obj['id']
469
    for oval_name in rule_obj['ovals']:
470
        if oval_name == "shared.xml":
471
            continue
472
473
        oval_product, _ = os.path.splitext(oval_name)
474
        for product in rule_obj['ovals'][oval_name]['products']:
475
            if product != oval_product:
476
                return "\trule_id:%s has a different product and OVALs names: %s is not %s" % \
477
                       (rule_id, product, oval_product)
478
479
480
def product_names_remediation(rule_obj, r_type):
481
    """
482
    For a rule_obj, check the scope of the platforms versus the product name
483
    of the remediations of type r_type.
484
    """
485
486
    rule_id = rule_obj['id']
487
    for r_name in rule_obj['remediations'][r_type]:
488
        r_product, _ = os.path.splitext(r_name)
489
        if r_product == "shared":
490
            continue
491
492
        for product in rule_obj['remediations'][r_type][r_name]['products']:
493
            if product != r_product:
494
                return "\trule_id:%s has a different product and %s remediation names: %s is not %s" % \
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (104/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
495
                       (rule_id, r_type, product, r_product)
496