Completed
Pull Request — master (#1625)
by Abdeali
01:38
created

coalib.processes.check_result_ignore()   B

Complexity

Conditions 5

Size

Total Lines 17

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 5
dl 0
loc 17
rs 8.5454
1
import multiprocessing
2
import os
3
import platform
4
import queue
5
import subprocess
6
from itertools import chain
7
8
from coalib.collecting import Dependencies
9
from coalib.collecting.Collectors import collect_files
10
from coalib.misc.StringConverter import StringConverter
11
from coalib.output.printers.LOG_LEVEL import LOG_LEVEL
12
from coalib.processes.BearRunning import run
13
from coalib.processes.CONTROL_ELEMENT import CONTROL_ELEMENT
14
from coalib.processes.LogPrinterThread import LogPrinterThread
15
from coalib.results.Result import Result
16
from coalib.results.result_actions.ApplyPatchAction import ApplyPatchAction
17
from coalib.results.result_actions.PrintDebugMessageAction import (
18
    PrintDebugMessageAction)
19
from coalib.results.result_actions.ShowPatchAction import ShowPatchAction
20
from coalib.results.RESULT_SEVERITY import RESULT_SEVERITY
21
from coalib.results.SourceRange import SourceRange
22
from coalib.settings.Setting import path_list
23
24
ACTIONS = [ApplyPatchAction,
25
           PrintDebugMessageAction,
26
           ShowPatchAction]
27
28
29
def get_cpu_count():
30
    try:
31
        return multiprocessing.cpu_count()
32
    # cpu_count is not implemented for some CPU architectures/OSes
33
    except NotImplementedError:  # pragma: no cover
34
        return 2
35
36
37
def fill_queue(queue_fill, any_list):
38
    """
39
    Takes element from a list and populates a queue with those elements.
40
41
    :param queue_fill: The queue to be filled.
42
    :param any_list:   List containing the elements.
43
    """
44
    for elem in any_list:
45
        queue_fill.put(elem)
46
47
48
def get_running_processes(processes):
49
    return sum((1 if process.is_alive() else 0) for process in processes)
50
51
52
def create_process_group(command_array, **kwargs):
53
    if platform.system() == "Windows":  # pragma: no cover
54
        proc = subprocess.Popen(
55
            command_array,
56
            creationflags=subprocess.CREATE_NEW_PROCESS_GROUP,
57
            **kwargs)
58
    else:
59
        proc = subprocess.Popen(command_array,
60
                                preexec_fn=os.setsid,
61
                                **kwargs)
62
    return proc
63
64
65
def get_default_actions(section):
66
    """
67
    Parses the key `default_actions` in the given section.
68
69
    :param section:    The section where to parse from.
70
    :return:           A dict with the bearname as keys and their default
71
                       actions as values and another dict that contains bears
72
                       and invalid action names.
73
    """
74
    try:
75
        default_actions = dict(section["default_actions"])
76
    except IndexError:
77
        return {}, {}
78
79
    action_dict = {action.get_metadata().name: action for action in ACTIONS}
80
    invalid_action_set = default_actions.values() - action_dict.keys()
81
    invalid_actions = {}
82
    if len(invalid_action_set) != 0:
83
        invalid_actions = {
84
            bear: action
85
            for bear, action in default_actions.items()
86
            if action in invalid_action_set}
87
        for invalid in invalid_actions.keys():
88
            del default_actions[invalid]
89
90
    actions = {bearname: action_dict[action_name]
91
               for bearname, action_name in default_actions.items()}
92
    return actions, invalid_actions
93
94
95
def autoapply_actions(results,
96
                      file_dict,
97
                      file_diff_dict,
98
                      section,
99
                      log_printer):
100
    """
101
    Auto-applies actions like defined in the given section.
102
103
    :param results:        A list of results.
104
    :param file_dict:      A dictionary containing the name of files and its
105
                           contents.
106
    :param file_diff_dict: A dictionary that contains filenames as keys and
107
                           diff objects as values.
108
    :param section:        The section.
109
    :param log_printer:    A log printer instance to log messages on.
110
    :return:               A list of unprocessed results.
111
    """
112
113
    default_actions, invalid_actions = get_default_actions(section)
114
115
    for bearname, actionname in invalid_actions.items():
116
        log_printer.warn("Selected default action {} for bear {} does "
117
                         "not exist. Ignoring action.".format(
118
                             repr(actionname),
119
                             repr(bearname)))
120
121
    if len(default_actions) == 0:
122
        # There's nothing to auto-apply.
123
        return results
124
125
    not_processed_results = []
126
    for result in results:
127
        try:
128
            action = default_actions[result.origin]
129
        except KeyError:
130
            not_processed_results.append(result)
131
            continue
132
133
        if not action.is_applicable(result, file_dict, file_diff_dict):
134
            log_printer.warn("Selected default action {} for bear {} is not "
135
                             "applicable. Action not applied.".format(
136
                                 repr(action.get_metadata().name),
137
                                 repr(result.origin)))
138
            not_processed_results.append(result)
139
            continue
140
141
        try:
142
            action().apply_from_section(result,
143
                                        file_dict,
144
                                        file_diff_dict,
145
                                        section)
146
            log_printer.info("Applied {} on {} from {}.".format(
147
                repr(action.get_metadata().name),
148
                result.location_repr(),
149
                repr(result.origin)))
150
        except Exception as ex:
151
            not_processed_results.append(result)
152
            log_printer.log_exception(
153
                "Failed to execute action {} with error: {}.".format(
154
                    repr(action.get_metadata().name),
155
                    ex),
156
                ex)
157
            log_printer.debug("-> for result " + repr(result) + ".")
158
159
    return not_processed_results
160
161
162
def check_result_ignore(result, ignore_ranges):
163
    """
164
    Determines if the result has to be ignored.
165
166
    :param result:        The result that needs to be checked.
167
    :param ignore_ranges: A list of tuples, each containing a list of lower
168
                          cased affected bearnames and a SourceRange to
169
                          ignore. If any of the bearname lists is empty, it
170
                          is considered an ignore range for all bears.
171
    :return:              True if the result has to be ignored.
172
    """
173
    for bears, range in ignore_ranges:
174
        if ((len(bears) == 0 or result.origin.lower() in bears) and
175
                result.overlaps(range)):
176
            return True
177
178
    return False
179
180
181
def print_result(results,
182
                 file_dict,
183
                 retval,
184
                 print_results,
185
                 section,
186
                 log_printer,
187
                 file_diff_dict,
188
                 ignore_ranges):
189
    """
190
    Takes the results produced by each bear and gives them to the print_results
191
    method to present to the user.
192
193
    :param results:        A list of results.
194
    :param file_dict:      A dictionary containing the name of files and its
195
                           contents.
196
    :param retval:         It is True if no results were yielded ever before.
197
                           If it is False this function will return False no
198
                           matter what happens. Else it depends on if this
199
                           invocation yields results.
200
    :param print_results:  A function that prints all given results appropriate
201
                           to the output medium.
202
    :param file_diff_dict: A dictionary that contains filenames as keys and
203
                           diff objects as values.
204
    :param ignore_ranges:  A list of SourceRanges. Results that affect code in
205
                           any of those ranges will be ignored.
206
    :return:               Returns False if any results were yielded. Else
207
                           True.
208
    """
209
    min_severity_str = str(section.get('min_severity', 'INFO')).upper()
210
    min_severity = RESULT_SEVERITY.str_dict.get(min_severity_str, 'INFO')
211
    results = list(filter(lambda result:
212
                          type(result) is Result and
213
                          result.severity >= min_severity and
214
                          not check_result_ignore(result, ignore_ranges),
215
                          results))
216
217
    if bool(section.get('autoapply', 'true')):
218
        patched_results = autoapply_actions(results,
219
                                            file_dict,
220
                                            file_diff_dict,
221
                                            section,
222
                                            log_printer)
223
    else:
224
        patched_results = results
225
226
    print_results(log_printer,
227
                  section,
228
                  patched_results,
229
                  file_dict,
230
                  file_diff_dict)
231
    return retval or len(results) > 0, patched_results
232
233
234
def get_file_dict(filename_list, log_printer):
235
    """
236
    Reads all files into a dictionary.
237
238
    :param filename_list: List of names of paths to files to get contents of.
239
    :param log_printer:   The logger which logs errors.
240
    :return:              Reads the content of each file into a dictionary
241
                          with filenames as keys.
242
    """
243
    file_dict = {}
244
    for filename in filename_list:
245
        try:
246
            with open(filename, "r", encoding="utf-8") as _file:
247
                file_dict[filename] = tuple(_file.readlines())
248
        except UnicodeDecodeError:
249
            log_printer.warn("Failed to read file '{}'. It seems to contain "
250
                             "non-unicode characters. Leaving it "
251
                             "out.".format(filename))
252
        except OSError as exception:  # pragma: no cover
253
            log_printer.log_exception("Failed to read file '{}' because of "
254
                                      "an unknown error. Leaving it "
255
                                      "out.".format(filename),
256
                                      exception,
257
                                      log_level=LOG_LEVEL.WARNING)
258
259
    return file_dict
260
261
262
def filter_raising_callables(it, exception, *args, **kwargs):
263
    """
264
    Filters all callable items inside the given iterator that raise the
265
    given exceptions.
266
267
    :param it:        The iterator to filter.
268
    :param exception: The (tuple of) exception(s) to filter for.
269
    :param args:      Positional arguments to pass to the callable.
270
    :param kwargs:    Keyword arguments to pass to the callable.
271
    """
272
    for elem in it:
273
        try:
274
            yield elem(*args, **kwargs)
275
        except exception:
276
            pass
277
278
279
def instantiate_bears(section,
280
                      local_bear_list,
281
                      global_bear_list,
282
                      file_dict,
283
                      message_queue):
284
    """
285
    Instantiates each bear with the arguments it needs.
286
287
    :param section:          The section the bears belong to.
288
    :param local_bear_list:  List of local bear classes to instantiate.
289
    :param global_bear_list: List of global bear classes to instantiate.
290
    :param file_dict:        Dictionary containing filenames and their
291
                             contents.
292
    :param message_queue:    Queue responsible to maintain the messages
293
                             delivered by the bears.
294
    :return:                 The local and global bear instance lists.
295
    """
296
    local_bear_list = [bear
297
                       for bear in filter_raising_callables(
298
                           local_bear_list,
299
                           RuntimeError,
300
                           section,
301
                           message_queue,
302
                           timeout=0.1)]
303
304
    global_bear_list = [bear
305
                        for bear in filter_raising_callables(
306
                            global_bear_list,
307
                            RuntimeError,
308
                            file_dict,
309
                            section,
310
                            message_queue,
311
                            timeout=0.1)]
312
313
    return local_bear_list, global_bear_list
314
315
316
def instantiate_processes(section,
317
                          local_bear_list,
318
                          global_bear_list,
319
                          job_count,
320
                          log_printer):
321
    """
322
    Instantiate the number of processes that will run bears which will be
323
    responsible for running bears in a multiprocessing environment.
324
325
    :param section:          The section the bears belong to.
326
    :param local_bear_list:  List of local bears belonging to the section.
327
    :param global_bear_list: List of global bears belonging to the section.
328
    :param job_count:        Max number of processes to create.
329
    :param log_printer:      The log printer to warn to.
330
    :return:                 A tuple containing a list of processes,
331
                             and the arguments passed to each process which are
332
                             the same for each object.
333
    """
334
    filename_list = collect_files(
335
        path_list(section.get('files', "")),
336
        log_printer,
337
        ignored_file_paths=path_list(section.get('ignore', "")),
338
        limit_file_paths=path_list(section.get('limit_files', "")))
339
    file_dict = get_file_dict(filename_list, log_printer)
340
341
    manager = multiprocessing.Manager()
342
    global_bear_queue = multiprocessing.Queue()
343
    filename_queue = multiprocessing.Queue()
344
    local_result_dict = manager.dict()
345
    global_result_dict = manager.dict()
346
    message_queue = multiprocessing.Queue()
347
    control_queue = multiprocessing.Queue()
348
349
    bear_runner_args = {"file_name_queue": filename_queue,
350
                        "local_bear_list": local_bear_list,
351
                        "global_bear_list": global_bear_list,
352
                        "global_bear_queue": global_bear_queue,
353
                        "file_dict": file_dict,
354
                        "local_result_dict": local_result_dict,
355
                        "global_result_dict": global_result_dict,
356
                        "message_queue": message_queue,
357
                        "control_queue": control_queue,
358
                        "timeout": 0.1}
359
360
    local_bear_list[:], global_bear_list[:] = instantiate_bears(
361
        section,
362
        local_bear_list,
363
        global_bear_list,
364
        file_dict,
365
        message_queue)
366
367
    fill_queue(filename_queue, file_dict.keys())
368
    fill_queue(global_bear_queue, range(len(global_bear_list)))
369
370
    return ([multiprocessing.Process(target=run, kwargs=bear_runner_args)
371
             for i in range(job_count)],
372
            bear_runner_args)
373
374
375
def get_ignore_scope(line, keyword):
376
    """
377
    Retrieves the bears that are to be ignored defined in the given line.
378
    :param line:    The line containing the ignore declaration.
379
    :param keyword: The keyword that was found. Everything after the rightmost
380
                    occurrence of it will be considered for the scope.
381
    :return:        A list of lower cased bearnames or an empty list (-> "all")
382
    """
383
    toignore = line[line.rfind(keyword) + len(keyword):]
384
    if toignore.startswith("all"):
385
        return []
386
    else:
387
        return list(StringConverter(toignore, list_delimiters=', '))
388
389
390
def yield_ignore_ranges(file_dict):
391
    """
392
    Yields tuples of affected bears and a SourceRange that shall be ignored for
393
    those.
394
395
    :param file_dict: The file dictionary.
396
    """
397
    for filename, file in file_dict.items():
398
        start = None
399
        bears = []
400
        for line_number, line in enumerate(file, start=1):
401
            line = line.lower()
402
            if "start ignoring " in line:
403
                start = line_number
404
                bears = get_ignore_scope(line, "start ignoring ")
405
            elif "stop ignoring" in line:
406
                if start:
407
                    yield (bears,
408
                           SourceRange.from_values(filename,
409
                                                   start,
410
                                                   end_line=line_number))
411
            elif "ignore " in line:
412
                yield (get_ignore_scope(line, "ignore "),
413
                       SourceRange.from_values(filename,
414
                                               line_number,
415
                                               end_line=line_number+1))
416
417
418
def process_queues(processes,
419
                   control_queue,
420
                   local_result_dict,
421
                   global_result_dict,
422
                   file_dict,
423
                   print_results,
424
                   section,
425
                   log_printer):
426
    """
427
    Iterate the control queue and send the results recieved to the print_result
428
    method so that they can be presented to the user.
429
430
    :param processes:          List of processes which can be used to run
431
                               Bears.
432
    :param control_queue:      Containing control elements that indicate
433
                               whether there is a result available and which
434
                               bear it belongs to.
435
    :param local_result_dict:  Dictionary containing results respective to
436
                               local bears. It is modified by the processes
437
                               i.e. results are added to it by multiple
438
                               processes.
439
    :param global_result_dict: Dictionary containing results respective to
440
                               global bears. It is modified by the processes
441
                               i.e. results are added to it by multiple
442
                               processes.
443
    :param file_dict:          Dictionary containing file contents with
444
                               filename as keys.
445
    :param print_results:      Prints all given results appropriate to the
446
                               output medium.
447
    :return:                   Return True if all bears execute succesfully and
448
                               Results were delivered to the user. Else False.
449
    """
450
    file_diff_dict = {}
451
    retval = False
452
    # Number of processes working on local/global bears. They are count down
453
    # when the last queue element of that process is processed which may be
454
    # *after* the process has ended!
455
    local_processes = len(processes)
456
    global_processes = len(processes)
457
    global_result_buffer = []
458
    ignore_ranges = list(yield_ignore_ranges(file_dict))
459
460
    # One process is the logger thread
461
    while local_processes > 1:
462
        try:
463
            control_elem, index = control_queue.get(timeout=0.1)
464
465
            if control_elem == CONTROL_ELEMENT.LOCAL_FINISHED:
466
                local_processes -= 1
467
            elif control_elem == CONTROL_ELEMENT.GLOBAL_FINISHED:
468
                global_processes -= 1
469
            elif control_elem == CONTROL_ELEMENT.LOCAL:
470
                assert local_processes != 0
471
                retval, res = print_result(local_result_dict[index],
472
                                           file_dict,
473
                                           retval,
474
                                           print_results,
475
                                           section,
476
                                           log_printer,
477
                                           file_diff_dict,
478
                                           ignore_ranges)
479
                local_result_dict[index] = res
480
            else:
481
                assert control_elem == CONTROL_ELEMENT.GLOBAL
482
                global_result_buffer.append(index)
483
        except queue.Empty:
484
            if get_running_processes(processes) < 2:  # pragma: no cover
485
                # Recover silently, those branches are only
486
                # nondeterministically covered.
487
                break
488
489
    # Flush global result buffer
490
    for elem in global_result_buffer:
491
        retval, res = print_result(global_result_dict[elem],
492
                                   file_dict,
493
                                   retval,
494
                                   print_results,
495
                                   section,
496
                                   log_printer,
497
                                   file_diff_dict,
498
                                   ignore_ranges)
499
        global_result_dict[elem] = res
500
501
    # One process is the logger thread
502
    while global_processes > 1:
503
        try:
504
            control_elem, index = control_queue.get(timeout=0.1)
505
506
            if control_elem == CONTROL_ELEMENT.GLOBAL:
507
                retval, res = print_result(global_result_dict[index],
508
                                           file_dict,
509
                                           retval,
510
                                           print_results,
511
                                           section,
512
                                           log_printer,
513
                                           file_diff_dict,
514
                                           ignore_ranges)
515
                global_result_dict[index] = res
516
            else:
517
                assert control_elem == CONTROL_ELEMENT.GLOBAL_FINISHED
518
                global_processes -= 1
519
        except queue.Empty:
520
            if get_running_processes(processes) < 2:  # pragma: no cover
521
                # Recover silently, those branches are only
522
                # nondeterministically covered.
523
                break
524
525
    return retval
526
527
528
def simplify_section_result(section_result):
529
    """
530
    Takes in a section's result from `execute_section` and simplifies it
531
    for easy usage in other functions.
532
533
    :param section_result: The result of a section which was executed.
534
    :return:               Tuple containing:
535
                            - bool - True if results were yielded
536
                            - bool - True if unfixed results were yielded
537
                            - list - Results from all bears (local and global)
538
    """
539
    section_yielded_result = section_result[0]
540
    results_for_section = []
541
    for value in chain(section_result[1].values(),
542
                       section_result[2].values()):
543
        if value is None:
544
            continue
545
546
        for result in value:
547
            results_for_section.append(result)
548
    section_yielded_unfixed_results = len(results_for_section) > 0
549
550
    return (section_yielded_result,
551
            section_yielded_unfixed_results,
552
            results_for_section)
553
554
555
def execute_section(section,
556
                    global_bear_list,
557
                    local_bear_list,
558
                    print_results,
559
                    log_printer):
560
    """
561
    Executes the section with the given bears.
562
563
    The execute_section method does the following things:
564
    1. Prepare a Process
565
      * Load files
566
      * Create queues
567
    2. Spawn up one or more Processes
568
    3. Output results from the Processes
569
    4. Join all processes
570
571
    :param section:          The section to execute.
572
    :param global_bear_list: List of global bears belonging to the section.
573
    :param local_bear_list:  List of local bears belonging to the section.
574
    :param print_results:    Prints all given results appropriate to the
575
                             output medium.
576
    :param log_printer:      The log_printer to warn to.
577
    :return:                 Tuple containing a bool (True if results were
578
                             yielded, False otherwise), a Manager.dict
579
                             containing all local results(filenames are key)
580
                             and a Manager.dict containing all global bear
581
                             results (bear names are key) as well as the
582
                             file dictionary.
583
    """
584
    local_bear_list = Dependencies.resolve(local_bear_list)
585
    global_bear_list = Dependencies.resolve(global_bear_list)
586
587
    try:
588
        running_processes = int(section['jobs'])
589
    except ValueError:
590
        log_printer.warn("Unable to convert setting 'jobs' into a number. "
591
                         "Falling back to CPU count.")
592
        running_processes = get_cpu_count()
593
    except IndexError:
594
        running_processes = get_cpu_count()
595
596
    processes, arg_dict = instantiate_processes(section,
597
                                                local_bear_list,
598
                                                global_bear_list,
599
                                                running_processes,
600
                                                log_printer)
601
602
    logger_thread = LogPrinterThread(arg_dict["message_queue"],
603
                                     log_printer)
604
    # Start and join the logger thread along with the processes to run bears
605
    processes.append(logger_thread)
606
607
    for runner in processes:
608
        runner.start()
609
610
    try:
611
        return (process_queues(processes,
612
                               arg_dict["control_queue"],
613
                               arg_dict["local_result_dict"],
614
                               arg_dict["global_result_dict"],
615
                               arg_dict["file_dict"],
616
                               print_results,
617
                               section,
618
                               log_printer),
619
                arg_dict["local_result_dict"],
620
                arg_dict["global_result_dict"],
621
                arg_dict["file_dict"])
622
    finally:
623
        logger_thread.running = False
624
625
        for runner in processes:
626
            runner.join()
627