Completed
Pull Request — master (#1116)
by Lasse
01:41
created

bears.c_languages.codeclone_detection.is_assignee()   B

Complexity

Conditions 7

Size

Total Lines 19

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 7
dl 0
loc 19
rs 7.3333
1
"""
2
This file contains counting conditions for use for count matrix based code
3
clone detection. (See http://goo.gl/8UuAW5 for general information about the
4
algorithm.)
5
"""
6
7
8
from coalib.bearlib.parsing.clang.cindex import CursorKind
9
from coalib.misc.Enum import enum
10
11
12
def is_function_declaration(cursor):
13
    """
14
    Checks if the given clang cursor is a function declaration.
15
16
    :param cursor: A clang cursor from the AST.
17
    :return:       A bool.
18
    """
19
    return cursor.kind == CursorKind.FUNCTION_DECL
20
21
22
def get_identifier_name(cursor):
23
    """
24
    Retrieves the identifier name from the given clang cursor.
25
26
    :param cursor: A clang cursor from the AST.
27
    :return:       The identifier as string.
28
    """
29
    return cursor.displayname.decode()
30
31
32
def is_literal(cursor):
33
    """
34
    :param cursor: A clang cursor from the AST.
35
    :return:       True if the cursor is a literal of any kind..
36
    """
37
    return cursor.kind in [CursorKind.INTEGER_LITERAL,
38
                           CursorKind.FLOATING_LITERAL,
39
                           CursorKind.IMAGINARY_LITERAL,
40
                           CursorKind.STRING_LITERAL,
41
                           CursorKind.CHARACTER_LITERAL,
42
                           CursorKind.OBJC_STRING_LITERAL,
43
                           CursorKind.CXX_BOOL_LITERAL_EXPR,
44
                           CursorKind.CXX_NULL_PTR_LITERAL_EXPR]
45
46
47
def is_reference(cursor):
48
    """
49
    Determines if the cursor is a reference to something, i.e. an identifier
50
    of a function or variable.
51
52
    :param cursor: A clang cursor from the AST.
53
    :return:       True if the cursor is a reference.
54
    """
55
    return cursor.kind in [CursorKind.VAR_DECL,
56
                           CursorKind.PARM_DECL,
57
                           CursorKind.DECL_REF_EXPR]
58
59
60
def _stack_contains_kind(stack, kind):
61
    """
62
    Checks if a cursor with the given kind is within the stack.
63
64
    :param stack: The stack holding a tuple holding the parent cursors and the
65
                  child number.
66
    :param kind:  The kind of the cursor to search for.
67
    :return:      True if the kind was found.
68
    """
69
    for elem, dummy in stack:
70
        if elem.kind == kind:
71
            return True
72
73
    return False
74
75
76
def _is_nth_child_of_kind(stack, allowed_nums, kind):
77
    """
78
    Checks if the stack contains a cursor which is of the given kind and the
79
    stack also has a child of this element which number is in the allowed_nums
80
    list.
81
82
    :param stack:        The stack holding a tuple holding the parent cursors
83
                         and the child number.
84
    :param allowed_nums: List/iterator of child numbers allowed.
85
    :param kind:         The kind of the parent element.
86
    :return:             Number of matches.
87
    """
88
    is_kind_child = False
89
    count = 0
90
    for elem, child_num in stack:
91
        if is_kind_child and child_num in allowed_nums:
92
            count += 1
93
94
        if elem.kind == kind:
95
            is_kind_child = True
96
        else:
97
            is_kind_child = False
98
99
    return count
100
101
102
def is_function(stack):
103
    """
104
    Checks if the cursor on top of the stack is used as a method or as a
105
    variable.
106
107
    :param stack: A stack holding a tuple holding the parent cursors and the
108
                  child number.
109
    :return:      True if this is used as a function, false otherwise.
110
    """
111
    return _is_nth_child_of_kind(stack, [0], CursorKind.CALL_EXPR) != 0
112
113
114
FOR_POSITION = enum("UNKNOWN", "INIT", "COND", "INC", "BODY")
115
116
117
def _get_position_in_for_tokens(tokens, position):
118
    """
119
    Retrieves the semantic position of the given position in a for loop. It
120
    operates under the assumptions that the given tokens represent a for loop
121
    and that the given position is within the tokens.
122
123
    :param tokens:   The tokens representing the for loop (clang extent)
124
    :param position: A tuple holding (line, column) of the position to
125
                     identify.
126
    :return:         A FOR_POSITION object indicating where the position is
127
                     semantically.
128
    """
129
    state = FOR_POSITION.INIT
130
    next_state = state
131
    opened_brackets = 0
132
    for token in tokens:
133
        if token.spelling.decode() == ";":
134
            next_state = state + 1
135
        elif token.spelling.decode() == "(":
136
            opened_brackets += 1
137
        elif token.spelling.decode() == ")":
138
            opened_brackets -= 1
139
            # Closed bracket for for condition, otherwise syntax error by clang
140
            if opened_brackets == 0:
141
                next_state = FOR_POSITION.BODY
142
143
        if next_state is not state:
144
            token_position = (token.extent.start.line,
145
                              token.extent.start.column)
146
            if position <= token_position:
147
                return state
148
            # Last state, if we reach it the position must be in body
149
            elif next_state == FOR_POSITION.BODY:
150
                return next_state
151
152
            state = next_state
153
154
    # We probably have a macro here, clang doesn't preprocess them. I don't see
155
    # a chance of getting macros parsed right here in the limited time
156
    # available. For our heuristic approach we'll just not count for loops
157
    # realized through macros. FIXME: This is not covered in the tests because
158
    # it contains a known bug that needs to be fixed, that is: macros destroy
159
    # everything.
160
    return FOR_POSITION.UNKNOWN  # pragma: no cover
161
162
163
def _get_positions_in_for_loop(stack):
164
    """
165
    Investigates all FOR_STMT objects in the stack and checks for each in
166
    what position the given cursor is.
167
168
    :param cursor: The cursor to investigate.
169
    :param stack:  The stack of parental cursors.
170
    :return:       A list of semantic FOR_POSITION's within for loops.
171
    """
172
    results = []
173
    for elem, dummy in stack:
174
        if elem.kind == CursorKind.FOR_STMT:
175
            results.append(_get_position_in_for_tokens(
176
                elem.get_tokens(),
177
                (stack[-1][0].location.line, stack[-1][0].location.column)))
178
179
    return results
180
181
182
def _get_binop_operator(cursor):
183
    """
184
    Returns the operator token of a binary operator cursor.
185
186
    :param cursor: A cursor of kind BINARY_OPERATOR.
187
    :return:       The token object containing the actual operator or None.
188
    """
189
    children = list(cursor.get_children())
190
    operator_min_begin = (children[0].location.line,
191
                          children[0].location.column)
192
    operator_max_end = (children[1].location.line,
193
                        children[1].location.column)
194
195
    for token in cursor.get_tokens():
196
        if (operator_min_begin < (token.extent.start.line,
197
                                  token.extent.start.column) and
198
            operator_max_end >= (token.extent.end.line,
199
                                token.extent.end.column)):
200
            return token
201
202
    return None  # pragma: no cover
203
204
205
def _stack_contains_operators(stack, operators):
206
    """
207
    Checks if one of the given operators is within the stack.
208
209
    :param stack:     The stack holding a tuple holding the parent cursors
210
                      and the child number.
211
    :param operators: A list of strings. E.g. ["+", "-"]
212
    :return:          True if the operator was found.
213
    """
214
    for elem, dummy in stack:
215
        if elem.kind in [CursorKind.BINARY_OPERATOR,
216
                         CursorKind.COMPOUND_ASSIGNMENT_OPERATOR]:
217
            operator = _get_binop_operator(elem)
218
            # Not known how to reproduce but may be possible when evil macros
219
            # join the game.
220
            if operator is None:  # pragma: no cover
221
                continue
222
223
            if operator.spelling.decode() in operators:
224
                return True
225
226
    return False
227
228
229
ARITH_BINARY_OPERATORS = ['+', '-', '*', '/', '%', '&', '|']
230
COMPARISION_OPERATORS = ["==", "<=", ">=", "<", ">", "!=", "&&", "||"]
231
ADV_ASSIGNMENT_OPERATORS = [op + "=" for op in ARITH_BINARY_OPERATORS]
232
ASSIGNMENT_OPERATORS = ["="] + ADV_ASSIGNMENT_OPERATORS
233
234
235
def in_sum(stack):
236
    """
237
    A counting condition returning true if the variable is used in a sum
238
    statement, i.e. within the operators +, - and their associated compound
239
    operators.
240
    """
241
    return _stack_contains_operators(stack, ['+', '-', '+=', '-='])
242
243
244
def in_product(stack):
245
    """
246
    A counting condition returning true if the variable is used in a product
247
    statement, i.e. within the operators *, /, % and their associated compound
248
    operators.
249
    """
250
    return _stack_contains_operators(stack, ['*', '/', '%', '*=', '/=', '%='])
251
252
253
def in_binary_operation(stack):
254
    """
255
    A counting condition returning true if the variable is used in a binary
256
    operation, i.e. within the operators |, & and their associated compound
257
    operators.
258
    """
259
    return _stack_contains_operators(stack, ['&', '|', '&=', '|='])
260
261
262
def member_accessed(stack):
263
    """
264
    Returns true if a member of the cursor is accessed or the cursor is the
265
    accessed member.
266
    """
267
    return _stack_contains_kind(stack, CursorKind.MEMBER_REF_EXPR)
268
269
270
# pylint: disabled=unused-argument
271
def used(stack):
272
    """
273
    Returns true.
274
    """
275
    return True
276
277
278
def returned(stack):
279
    """
280
    Returns true if the cursor on top is used in a return statement.
281
    """
282
    return _stack_contains_kind(stack, CursorKind.RETURN_STMT)
283
284
285
def is_inc_or_dec(stack):
286
    """
287
    Returns true if the cursor on top is inc- or decremented.
288
    """
289
    for elem, dummy in stack:
290
        if elem.kind == CursorKind.UNARY_OPERATOR:
291
            for token in elem.get_tokens():
292
                if token.spelling.decode() in ["--", "++"]:
293
                    return True
294
295
    return False
296
297
298
def is_condition(stack):
299
    """
300
    Returns true if the cursor on top is used as a condition.
301
    """
302
    return (_is_nth_child_of_kind(stack, [0], CursorKind.WHILE_STMT) != 0 or
303
            _is_nth_child_of_kind(stack, [0], CursorKind.IF_STMT) != 0 or
304
            FOR_POSITION.COND in _get_positions_in_for_loop(stack))
305
306
307
def in_condition(stack):
308
    """
309
    Returns true if the cursor on top is in the body of one condition.
310
    """
311
    # In every case the first child of IF_STMT is the condition itself
312
    # (non-NULL) so the second and third child are in the then/else branch
313
    return _is_nth_child_of_kind(stack, [1, 2], CursorKind.IF_STMT) == 1
314
315
316
def in_second_level_condition(stack):
317
    """
318
    Returns true if the cursor on top is in the body of two nested conditions.
319
    """
320
    return _is_nth_child_of_kind(stack, [1, 2], CursorKind.IF_STMT) == 2
321
322
323
def in_third_level_condition(stack):
324
    """
325
    Returns true if the cursor on top is in the body of three or more nested
326
    conditions.
327
    """
328
    return _is_nth_child_of_kind(stack, [1, 2], CursorKind.IF_STMT) > 2
329
330
331
def is_assignee(stack):
332
    """
333
    Returns true if the cursor on top is assigned something.
334
    """
335
    cursor_pos = (stack[-1][0].extent.end.line, stack[-1][0].extent.end.column)
336
    for elem, dummy in stack:
337
        if (
338
                elem.kind == CursorKind.BINARY_OPERATOR or
339
                elem.kind == CursorKind.COMPOUND_ASSIGNMENT_OPERATOR):
340
            for token in elem.get_tokens():
341
                token_pos = (token.extent.start.line,
342
                             token.extent.start.column)
343
                # This needs to be an assignment and cursor has to be on LHS
344
                if (
345
                        token.spelling.decode() in ASSIGNMENT_OPERATORS and
346
                        cursor_pos <= token_pos):
347
                    return True
348
349
    return is_inc_or_dec(stack)
350
351
352
def is_assigner(stack):
353
    """
354
    Returns true if the cursor on top is used for an assignment on the RHS.
355
    """
356
    cursor_pos = (stack[-1][0].extent.start.line,
357
                  stack[-1][0].extent.start.column)
358
    for elem, dummy in stack:
359
        if (
360
                elem.kind == CursorKind.BINARY_OPERATOR or
361
                elem.kind == CursorKind.COMPOUND_ASSIGNMENT_OPERATOR):
362
            for token in elem.get_tokens():
363
                token_pos = (token.extent.end.line, token.extent.end.column)
364
                # This needs to be an assignment and cursor has to be on RHS
365
                # or if we have something like += its irrelevant on which side
366
                # it is because += reads on both sides
367
                if (token.spelling.decode() in ASSIGNMENT_OPERATORS and (
368
                        token_pos <= cursor_pos or
369
                        token.spelling.decode() != "=")):
370
                    return True
371
372
    return is_inc_or_dec(stack)
373
374
375
def _loop_level(stack):
376
    """
377
    Investigates the stack to determine the loop level.
378
379
    :param stack: A stack of clang cursors.
380
    :return:      An integer representing the level of nested loops.
381
    """
382
    positions_in_for = _get_positions_in_for_loop(stack)
383
    return (positions_in_for.count(FOR_POSITION.INC) +
384
            positions_in_for.count(FOR_POSITION.BODY) +
385
            _is_nth_child_of_kind(stack, [1], CursorKind.WHILE_STMT))
386
387
388
def loop_content(stack):
389
    """
390
    Returns true if the cursor on top is within a first level loop.
391
    """
392
    return _loop_level(stack) == 1
393
394
395
def second_level_loop_content(stack):
396
    """
397
    Returns true if the cursor on top is within a second level loop.
398
    """
399
    return _loop_level(stack) == 2
400
401
402
def third_level_loop_content(stack):
403
    """
404
    Returns true if the cursor on top is within a third (or higher) level loop.
405
    """
406
    return _loop_level(stack) > 2
407
408
409
def is_param(stack):
410
    """
411
    Returns true if the cursor on top is a parameter declaration.
412
    """
413
    return stack[-1][0].kind == CursorKind.PARM_DECL
414
415
416
def is_called(stack):
417
    """
418
    Yields true if the cursor is a function that is called. (Function pointers
419
    are counted too.)
420
    """
421
    return (_stack_contains_kind(stack, CursorKind.CALL_EXPR) and
422
            is_function(stack))
423
424
425
def is_call_param(stack):
426
    """
427
    Yields true if the cursor is a parameter to another function.
428
    """
429
    return (_stack_contains_kind(stack, CursorKind.CALL_EXPR) and
430
            not is_function(stack))
431
432
433
condition_dict = {"used": used,
434
                  "returned": returned,
435
                  "is_condition": is_condition,
436
                  "in_condition": in_condition,
437
                  "in_second_level_condition": in_second_level_condition,
438
                  "in_third_level_condition": in_third_level_condition,
439
                  "is_assignee": is_assignee,
440
                  "is_assigner": is_assigner,
441
                  "loop_content": loop_content,
442
                  "second_level_loop_content": second_level_loop_content,
443
                  "third_level_loop_content": third_level_loop_content,
444
                  "is_param": is_param,
445
                  "is_called": is_called,
446
                  "is_call_param": is_call_param,
447
                  "in_sum": in_sum,
448
                  "in_product": in_product,
449
                  "in_binary_operation": in_binary_operation,
450
                  "member_accessed": member_accessed}
451
452
453
def counting_condition(value):
454
    """
455
    This is a custom converter to convert a setting from coala into counting
456
    condition function objects for this bear only.
457
458
    :param value: An object that can be converted to a list.
459
    :return:      A list of functions (counting conditions)
460
    """
461
    str_list = list(value)
462
    result_list = []
463
    for elem in str_list:
464
        result_list.append(condition_dict.get(elem.lower()))
465
466
    return result_list
467