yield_ignore_ranges()   F
last analyzed

Complexity

Conditions 10

Size

Total Lines 44

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 10
dl 0
loc 44
rs 3.1304
c 0
b 0
f 0

How to fix   Complexity   

Complexity

Complex classes like yield_ignore_ranges() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
import multiprocessing
2
import os
3
import platform
4
import queue
5
import subprocess
6
from itertools import chain
7
8
from coalib.collecting import Dependencies
0 ignored issues
show
Unused Code introduced by
Unused Dependencies imported from coalib.collecting
Loading history...
9
from coalib.collecting.Collectors import collect_files
10
from coala_utils.string_processing.StringConverter import StringConverter
11
from coalib.output.printers.LOG_LEVEL import LOG_LEVEL
12
from coalib.processes.BearRunning import run
13
from coalib.processes.CONTROL_ELEMENT import CONTROL_ELEMENT
14
from coalib.processes.LogPrinterThread import LogPrinterThread
15
from coalib.results.Result import Result
16
from coalib.results.result_actions.ApplyPatchAction import ApplyPatchAction
17
from coalib.results.result_actions.PrintDebugMessageAction import (
18
    PrintDebugMessageAction)
19
from coalib.results.result_actions.ShowPatchAction import ShowPatchAction
20
from coalib.results.RESULT_SEVERITY import RESULT_SEVERITY
21
from coalib.results.SourceRange import SourceRange
22
from coalib.settings.Setting import glob_list
23
from coalib.parsing.Globbing import fnmatch
24
25
26
ACTIONS = [ApplyPatchAction,
27
           PrintDebugMessageAction,
28
           ShowPatchAction]
29
30
31
def get_cpu_count():
32
    try:
33
        return multiprocessing.cpu_count()
34
    # cpu_count is not implemented for some CPU architectures/OSes
35
    except NotImplementedError:  # pragma: no cover
36
        return 2
37
38
39
def fill_queue(queue_fill, any_list):
40
    """
41
    Takes element from a list and populates a queue with those elements.
42
43
    :param queue_fill: The queue to be filled.
44
    :param any_list:   List containing the elements.
45
    """
46
    for elem in any_list:
47
        queue_fill.put(elem)
48
49
50
def get_running_processes(processes):
51
    return sum((1 if process.is_alive() else 0) for process in processes)
52
53
54
def create_process_group(command_array, **kwargs):
55
    if platform.system() == "Windows":  # pragma: no cover
56
        proc = subprocess.Popen(
57
            command_array,
58
            creationflags=subprocess.CREATE_NEW_PROCESS_GROUP,
59
            **kwargs)
60
    else:
61
        proc = subprocess.Popen(command_array,
62
                                preexec_fn=os.setsid,
63
                                **kwargs)
64
    return proc
65
66
67
def get_default_actions(section):
68
    """
69
    Parses the key ``default_actions`` in the given section.
70
71
    :param section:    The section where to parse from.
72
    :return:           A dict with the bearname as keys and their default
73
                       actions as values and another dict that contains bears
74
                       and invalid action names.
75
    """
76
    try:
77
        default_actions = dict(section["default_actions"])
78
    except IndexError:
79
        return {}, {}
80
81
    action_dict = {action.get_metadata().name: action for action in ACTIONS}
82
    invalid_action_set = default_actions.values() - action_dict.keys()
83
    invalid_actions = {}
84
    if len(invalid_action_set) != 0:
85
        invalid_actions = {
86
            bear: action
87
            for bear, action in default_actions.items()
88
            if action in invalid_action_set}
89
        for invalid in invalid_actions.keys():
90
            del default_actions[invalid]
91
92
    actions = {bearname: action_dict[action_name]
93
               for bearname, action_name in default_actions.items()}
94
    return actions, invalid_actions
95
96
97
def autoapply_actions(results,
98
                      file_dict,
99
                      file_diff_dict,
100
                      section,
101
                      log_printer):
102
    """
103
    Auto-applies actions like defined in the given section.
104
105
    :param results:        A list of results.
106
    :param file_dict:      A dictionary containing the name of files and its
107
                           contents.
108
    :param file_diff_dict: A dictionary that contains filenames as keys and
109
                           diff objects as values.
110
    :param section:        The section.
111
    :param log_printer:    A log printer instance to log messages on.
112
    :return:               A list of unprocessed results.
113
    """
114
115
    default_actions, invalid_actions = get_default_actions(section)
116
117
    for bearname, actionname in invalid_actions.items():
118
        log_printer.warn("Selected default action {!r} for bear {!r} does "
119
                         "not exist. Ignoring action.".format(actionname,
120
                                                              bearname))
121
122
    if len(default_actions) == 0:
123
        # There's nothing to auto-apply.
124
        return results
125
126
    not_processed_results = []
127
    for result in results:
128
        try:
129
            # Match full bear names deterministically, prioritized!
130
            action = default_actions[result.origin]
131
        except KeyError:
132
            for bear_glob in default_actions:
133
                if fnmatch(result.origin, bear_glob):
134
                    action = default_actions[bear_glob]
135
                    break
136
            else:
137
                not_processed_results.append(result)
138
                continue
139
140
        if not action.is_applicable(result, file_dict, file_diff_dict):
141
            log_printer.warn("Selected default action {!r} for bear {!r} is "
142
                             "not applicable. Action not applied.".format(
143
                                 action.get_metadata().name, result.origin))
144
            not_processed_results.append(result)
145
            continue
146
147
        try:
148
            action().apply_from_section(result,
149
                                        file_dict,
150
                                        file_diff_dict,
151
                                        section)
152
            log_printer.info("Applied {!r} on {} from {!r}.".format(
153
                action.get_metadata().name,
154
                result.location_repr(),
155
                result.origin))
156
        except Exception as ex:
157
            not_processed_results.append(result)
158
            log_printer.log_exception(
159
                "Failed to execute action {!r} with error: {}.".format(
160
                    action.get_metadata().name, ex),
161
                ex)
162
            log_printer.debug("-> for result " + repr(result) + ".")
163
164
    return not_processed_results
165
166
167
def check_result_ignore(result, ignore_ranges):
168
    """
169
    Determines if the result has to be ignored.
170
171
    :param result:        The result that needs to be checked.
172
    :param ignore_ranges: A list of tuples, each containing a list of lower
173
                          cased affected bearnames and a SourceRange to
174
                          ignore. If any of the bearname lists is empty, it
175
                          is considered an ignore range for all bears.
176
                          This may be a list of globbed bear wildcards.
177
    :return:              True if the result has to be ignored.
178
    """
179
    for bears, range in ignore_ranges:
180
        orig = result.origin.lower()
181
        if (result.overlaps(range) and
182
                (len(bears) == 0 or orig in bears or fnmatch(orig, bears))):
183
            return True
184
185
    return False
186
187
188
def print_result(results,
189
                 file_dict,
190
                 retval,
191
                 print_results,
192
                 section,
193
                 log_printer,
194
                 file_diff_dict,
195
                 ignore_ranges):
196
    """
197
    Takes the results produced by each bear and gives them to the print_results
198
    method to present to the user.
199
200
    :param results:        A list of results.
201
    :param file_dict:      A dictionary containing the name of files and its
202
                           contents.
203
    :param retval:         It is True if no results were yielded ever before.
204
                           If it is False this function will return False no
205
                           matter what happens. Else it depends on if this
206
                           invocation yields results.
207
    :param print_results:  A function that prints all given results appropriate
208
                           to the output medium.
209
    :param file_diff_dict: A dictionary that contains filenames as keys and
210
                           diff objects as values.
211
    :param ignore_ranges:  A list of SourceRanges. Results that affect code in
212
                           any of those ranges will be ignored.
213
    :return:               Returns False if any results were yielded. Else
214
                           True.
215
    """
216
    min_severity_str = str(section.get('min_severity', 'INFO')).upper()
217
    min_severity = RESULT_SEVERITY.str_dict.get(min_severity_str, 'INFO')
218
    results = list(filter(lambda result:
219
                          type(result) is Result and
220
                          result.severity >= min_severity and
221
                          not check_result_ignore(result, ignore_ranges),
222
                          results))
223
224
    if bool(section.get('autoapply', 'true')):
225
        patched_results = autoapply_actions(results,
226
                                            file_dict,
227
                                            file_diff_dict,
228
                                            section,
229
                                            log_printer)
230
    else:
231
        patched_results = results
232
233
    print_results(log_printer,
234
                  section,
235
                  patched_results,
236
                  file_dict,
237
                  file_diff_dict)
238
    return retval or len(results) > 0, patched_results
239
240
241
def get_file_dict(filename_list, log_printer):
242
    """
243
    Reads all files into a dictionary.
244
245
    :param filename_list: List of names of paths to files to get contents of.
246
    :param log_printer:   The logger which logs errors.
247
    :return:              Reads the content of each file into a dictionary
248
                          with filenames as keys.
249
    """
250
    file_dict = {}
251
    for filename in filename_list:
252
        try:
253
            with open(filename, "r", encoding="utf-8") as _file:
254
                file_dict[filename] = tuple(_file.readlines())
255
        except UnicodeDecodeError:
256
            log_printer.warn("Failed to read file '{}'. It seems to contain "
257
                             "non-unicode characters. Leaving it "
258
                             "out.".format(filename))
259
        except OSError as exception:  # pragma: no cover
260
            log_printer.log_exception("Failed to read file '{}' because of "
261
                                      "an unknown error. Leaving it "
262
                                      "out.".format(filename),
263
                                      exception,
264
                                      log_level=LOG_LEVEL.WARNING)
265
266
    log_printer.debug("Files that will be checked:\n" +
267
                      "\n".join(file_dict.keys()))
268
    return file_dict
269
270
271
def filter_raising_callables(it, exception, *args, **kwargs):
272
    """
273
    Filters all callable items inside the given iterator that raise the
274
    given exceptions.
275
276
    :param it:        The iterator to filter.
277
    :param exception: The (tuple of) exception(s) to filter for.
278
    :param args:      Positional arguments to pass to the callable.
279
    :param kwargs:    Keyword arguments to pass to the callable.
280
    """
281
    for elem in it:
282
        try:
283
            yield elem(*args, **kwargs)
284
        except exception:
285
            pass
286
287
288
def instantiate_bears(section,
289
                      local_bear_list,
290
                      global_bear_list,
291
                      file_dict,
292
                      message_queue):
293
    """
294
    Instantiates each bear with the arguments it needs.
295
296
    :param section:          The section the bears belong to.
297
    :param local_bear_list:  List of local bear classes to instantiate.
298
    :param global_bear_list: List of global bear classes to instantiate.
299
    :param file_dict:        Dictionary containing filenames and their
300
                             contents.
301
    :param message_queue:    Queue responsible to maintain the messages
302
                             delivered by the bears.
303
    :return:                 The local and global bear instance lists.
304
    """
305
    local_bear_list = [bear
306
                       for bear in filter_raising_callables(
307
                           local_bear_list,
308
                           RuntimeError,
309
                           section,
310
                           message_queue,
311
                           timeout=0.1)]
312
313
    global_bear_list = [bear
314
                        for bear in filter_raising_callables(
315
                            global_bear_list,
316
                            RuntimeError,
317
                            file_dict,
318
                            section,
319
                            message_queue,
320
                            timeout=0.1)]
321
322
    return local_bear_list, global_bear_list
323
324
325
def instantiate_processes(section,
326
                          local_bear_list,
327
                          global_bear_list,
328
                          job_count,
329
                          cache,
330
                          log_printer):
331
    """
332
    Instantiate the number of processes that will run bears which will be
333
    responsible for running bears in a multiprocessing environment.
334
335
    :param section:          The section the bears belong to.
336
    :param local_bear_list:  List of local bears belonging to the section.
337
    :param global_bear_list: List of global bears belonging to the section.
338
    :param job_count:        Max number of processes to create.
339
    :param cache:            An instance of ``misc.Caching.FileCache`` to use as
340
                             a file cache buffer.
341
    :param log_printer:      The log printer to warn to.
342
    :return:                 A tuple containing a list of processes,
343
                             and the arguments passed to each process which are
344
                             the same for each object.
345
    """
346
    filename_list = collect_files(
347
        glob_list(section.get('files', "")),
348
        log_printer,
349
        ignored_file_paths=glob_list(section.get('ignore', "")),
350
        limit_file_paths=glob_list(section.get('limit_files', "")))
351
352
    # This stores all matched files irrespective of whether coala is run
353
    # only on changed files or not. Global bears require all the files
354
    complete_filename_list = filename_list
355
356
    # Start tracking all the files
357
    if cache:
358
        cache.track_files(set(complete_filename_list))
359
        changed_files = cache.get_uncached_files(
360
            set(filename_list)) if cache else filename_list
361
362
        # If caching is enabled then the local bears should process only the
363
        # changed files.
364
        log_printer.debug("coala is run only on changed files, bears' log "
365
                          "messages from previous runs may not appear. You may "
366
                          "use the `--flush-cache` flag to see them.")
367
        filename_list = changed_files
368
369
    # Note: the complete file dict is given as the file dict to bears and
370
    # the whole project is accessible to every bear. However, local bears are
371
    # run only for the changed files if caching is enabled.
372
    complete_file_dict = get_file_dict(complete_filename_list, log_printer)
373
    file_dict = {filename: complete_file_dict[filename]
374
                 for filename in filename_list
375
                 if filename in complete_file_dict}
376
377
    manager = multiprocessing.Manager()
378
    global_bear_queue = multiprocessing.Queue()
379
    filename_queue = multiprocessing.Queue()
380
    local_result_dict = manager.dict()
381
    global_result_dict = manager.dict()
382
    message_queue = multiprocessing.Queue()
383
    control_queue = multiprocessing.Queue()
384
385
    bear_runner_args = {"file_name_queue": filename_queue,
386
                        "local_bear_list": local_bear_list,
387
                        "global_bear_list": global_bear_list,
388
                        "global_bear_queue": global_bear_queue,
389
                        "file_dict": file_dict,
390
                        "local_result_dict": local_result_dict,
391
                        "global_result_dict": global_result_dict,
392
                        "message_queue": message_queue,
393
                        "control_queue": control_queue,
394
                        "timeout": 0.1}
395
396
    local_bear_list[:], global_bear_list[:] = instantiate_bears(
397
        section,
398
        local_bear_list,
399
        global_bear_list,
400
        complete_file_dict,
401
        message_queue)
402
403
    fill_queue(filename_queue, file_dict.keys())
404
    fill_queue(global_bear_queue, range(len(global_bear_list)))
405
406
    return ([multiprocessing.Process(target=run, kwargs=bear_runner_args)
407
             for i in range(job_count)],
408
            bear_runner_args)
409
410
411
def get_ignore_scope(line, keyword):
412
    """
413
    Retrieves the bears that are to be ignored defined in the given line.
414
415
    :param line:    The line containing the ignore declaration.
416
    :param keyword: The keyword that was found. Everything after the rightmost
417
                    occurrence of it will be considered for the scope.
418
    :return:        A list of lower cased bearnames or an empty list (-> "all")
419
    """
420
    toignore = line[line.rfind(keyword) + len(keyword):]
421
    if toignore.startswith("all"):
422
        return []
423
    else:
424
        return list(StringConverter(toignore, list_delimiters=', '))
425
426
427
def yield_ignore_ranges(file_dict):
428
    """
429
    Yields tuples of affected bears and a SourceRange that shall be ignored for
430
    those.
431
432
    :param file_dict: The file dictionary.
433
    """
434
    for filename, file in file_dict.items():
435
        start = None
436
        bears = []
437
        stop_ignoring = False
438
        for line_number, line in enumerate(file, start=1):
439
            # Before lowering all lines ever read, first look for the biggest
440
            # common substring, case sensitive: I*gnor*e, start i*gnor*ing.
441
            if 'gnor' in line:
442
                line = line.lower()
443
                if "start ignoring " in line:
444
                    start = line_number
445
                    bears = get_ignore_scope(line, "start ignoring ")
446
                elif "stop ignoring" in line:
447
                    stop_ignoring = True
448
                    if start:
449
                        yield (bears,
450
                               SourceRange.from_values(
451
                                   filename,
452
                                   start,
453
                                   1,
454
                                   line_number,
455
                                   len(file[line_number-1])))
456
                elif "ignore " in line:
457
                    end_line = min(line_number + 1, len(file))
458
                    yield (get_ignore_scope(line, "ignore "),
459
                           SourceRange.from_values(
460
                               filename,
461
                               line_number, 1,
462
                               end_line, len(file[end_line - 1])))
463
464
        if stop_ignoring is False and start is not None:
465
            yield (bears,
466
                   SourceRange.from_values(filename,
467
                                           start,
468
                                           1,
469
                                           len(file),
470
                                           len(file[-1])))
471
472
473
def get_file_list(results):
474
    """
475
    Get the set of files that are affected in the given results.
476
477
    :param results: A list of results from which the list of files is to be
478
                    extracted.
479
    :return:        A set of file paths containing the mentioned list of
480
                    files.
481
    """
482
    return {code.file for result in results for code in result.affected_code}
483
484
485
def process_queues(processes,
486
                   control_queue,
487
                   local_result_dict,
488
                   global_result_dict,
489
                   file_dict,
490
                   print_results,
491
                   section,
492
                   cache,
493
                   log_printer):
494
    """
495
    Iterate the control queue and send the results received to the print_result
496
    method so that they can be presented to the user.
497
498
    :param processes:          List of processes which can be used to run
499
                               Bears.
500
    :param control_queue:      Containing control elements that indicate
501
                               whether there is a result available and which
502
                               bear it belongs to.
503
    :param local_result_dict:  Dictionary containing results respective to
504
                               local bears. It is modified by the processes
505
                               i.e. results are added to it by multiple
506
                               processes.
507
    :param global_result_dict: Dictionary containing results respective to
508
                               global bears. It is modified by the processes
509
                               i.e. results are added to it by multiple
510
                               processes.
511
    :param file_dict:          Dictionary containing file contents with
512
                               filename as keys.
513
    :param print_results:      Prints all given results appropriate to the
514
                               output medium.
515
    :param cache:              An instance of ``misc.Caching.FileCache`` to use
516
                               as a file cache buffer.
517
    :return:                   Return True if all bears execute successfully and
518
                               Results were delivered to the user. Else False.
519
    """
520
    file_diff_dict = {}
521
    retval = False
522
    # Number of processes working on local/global bears. They are count down
523
    # when the last queue element of that process is processed which may be
524
    # *after* the process has ended!
525
    local_processes = len(processes)
526
    global_processes = len(processes)
527
    global_result_buffer = []
528
    result_files = set()
529
    ignore_ranges = list(yield_ignore_ranges(file_dict))
530
531
    # One process is the logger thread
532
    while local_processes > 1:
533
        try:
534
            control_elem, index = control_queue.get(timeout=0.1)
535
536
            if control_elem == CONTROL_ELEMENT.LOCAL_FINISHED:
537
                local_processes -= 1
538
            elif control_elem == CONTROL_ELEMENT.GLOBAL_FINISHED:
539
                global_processes -= 1
540
            elif control_elem == CONTROL_ELEMENT.LOCAL:
541
                assert local_processes != 0
542
                result_files.update(get_file_list(local_result_dict[index]))
543
                retval, res = print_result(local_result_dict[index],
544
                                           file_dict,
545
                                           retval,
546
                                           print_results,
547
                                           section,
548
                                           log_printer,
549
                                           file_diff_dict,
550
                                           ignore_ranges)
551
                local_result_dict[index] = res
552
            else:
553
                assert control_elem == CONTROL_ELEMENT.GLOBAL
554
                global_result_buffer.append(index)
555
        except queue.Empty:
556
            if get_running_processes(processes) < 2:  # pragma: no cover
557
                # Recover silently, those branches are only
558
                # nondeterministically covered.
559
                break
560
561
    # Flush global result buffer
562
    for elem in global_result_buffer:
563
        result_files.update(get_file_list(global_result_dict[elem]))
564
        retval, res = print_result(global_result_dict[elem],
565
                                   file_dict,
566
                                   retval,
567
                                   print_results,
568
                                   section,
569
                                   log_printer,
570
                                   file_diff_dict,
571
                                   ignore_ranges)
572
        global_result_dict[elem] = res
573
574
    # One process is the logger thread
575
    while global_processes > 1:
576
        try:
577
            control_elem, index = control_queue.get(timeout=0.1)
578
579
            if control_elem == CONTROL_ELEMENT.GLOBAL:
580
                result_files.update(get_file_list(global_result_dict[index]))
581
                retval, res = print_result(global_result_dict[index],
582
                                           file_dict,
583
                                           retval,
584
                                           print_results,
585
                                           section,
586
                                           log_printer,
587
                                           file_diff_dict,
588
                                           ignore_ranges)
589
                global_result_dict[index] = res
590
            else:
591
                assert control_elem == CONTROL_ELEMENT.GLOBAL_FINISHED
592
                global_processes -= 1
593
        except queue.Empty:
594
            if get_running_processes(processes) < 2:  # pragma: no cover
595
                # Recover silently, those branches are only
596
                # nondeterministically covered.
597
                break
598
599
    if cache:
600
        cache.untrack_files(result_files)
601
    return retval
602
603
604
def simplify_section_result(section_result):
605
    """
606
    Takes in a section's result from ``execute_section`` and simplifies it
607
    for easy usage in other functions.
608
609
    :param section_result: The result of a section which was executed.
610
    :return:               Tuple containing:
611
                            - bool - True if results were yielded
612
                            - bool - True if unfixed results were yielded
613
                            - list - Results from all bears (local and global)
614
    """
615
    section_yielded_result = section_result[0]
616
    results_for_section = []
617
    for value in chain(section_result[1].values(),
618
                       section_result[2].values()):
619
        if value is None:
620
            continue
621
622
        for result in value:
623
            results_for_section.append(result)
624
    section_yielded_unfixed_results = len(results_for_section) > 0
625
626
    return (section_yielded_result,
627
            section_yielded_unfixed_results,
628
            results_for_section)
629
630
631
def execute_section(section,
632
                    global_bear_list,
633
                    local_bear_list,
634
                    print_results,
635
                    cache,
636
                    log_printer):
637
    """
638
    Executes the section with the given bears.
639
640
    The execute_section method does the following things:
641
642
    1. Prepare a Process
643
       -  Load files
644
       -  Create queues
645
    2. Spawn up one or more Processes
646
    3. Output results from the Processes
647
    4. Join all processes
648
649
    :param section:          The section to execute.
650
    :param global_bear_list: List of global bears belonging to the section.
651
                             Dependencies are already resolved.
652
    :param local_bear_list:  List of local bears belonging to the section.
653
                             Dependencies are already resolved.
654
    :param print_results:    Prints all given results appropriate to the
655
                             output medium.
656
    :param cache:            An instance of ``misc.Caching.FileCache`` to use as
657
                             a file cache buffer.
658
    :param log_printer:      The log_printer to warn to.
659
    :return:                 Tuple containing a bool (True if results were
660
                             yielded, False otherwise), a Manager.dict
661
                             containing all local results(filenames are key)
662
                             and a Manager.dict containing all global bear
663
                             results (bear names are key) as well as the
664
                             file dictionary.
665
    """
666
    try:
667
        running_processes = int(section['jobs'])
668
    except ValueError:
669
        log_printer.warn("Unable to convert setting 'jobs' into a number. "
670
                         "Falling back to CPU count.")
671
        running_processes = get_cpu_count()
672
    except IndexError:
673
        running_processes = get_cpu_count()
674
675
    processes, arg_dict = instantiate_processes(section,
676
                                                local_bear_list,
677
                                                global_bear_list,
678
                                                running_processes,
679
                                                cache,
680
                                                log_printer)
681
682
    logger_thread = LogPrinterThread(arg_dict["message_queue"],
683
                                     log_printer)
684
    # Start and join the logger thread along with the processes to run bears
685
    processes.append(logger_thread)
686
687
    for runner in processes:
688
        runner.start()
689
690
    try:
691
        return (process_queues(processes,
692
                               arg_dict["control_queue"],
693
                               arg_dict["local_result_dict"],
694
                               arg_dict["global_result_dict"],
695
                               arg_dict["file_dict"],
696
                               print_results,
697
                               section,
698
                               cache,
699
                               log_printer),
700
                arg_dict["local_result_dict"],
701
                arg_dict["global_result_dict"],
702
                arg_dict["file_dict"])
703
    finally:
704
        logger_thread.running = False
705
706
        for runner in processes:
707
            runner.join()
708