Passed
Push — master ( 9e6bed...ccd378 )
by Ken M.
01:08
created

keywords_finder   A

Complexity

Total Complexity 14

Size/Duplication

Total Lines 92
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 71
dl 0
loc 92
rs 10
c 0
b 0
f 0
wmc 14

4 Functions

Rating   Name   Duplication   Size   Complexity  
A positions_overlapped() 0 6 1
A get_matched_positions() 0 12 5
B checkio() 0 23 7
A merge_positions() 0 2 1
1
from itertools import product
2
3
4
def get_matched_positions(text, keywords):
5
    results = []
6
    for i in keywords:
7
        if i:
8
            index = 0
9
            while index < len(text):
10
                index = text.find(i, index)
11
                if index == -1:
12
                    break
13
                results.append((index, index + len(i)))
14
                index += len(i)
15
    return results
16
17
18
def positions_overlapped(position1, position2, text):
19
    return any(
20
        [
21
            position1[1] > position2[0],
22
            position1[1] == position2[0]
23
            and text[position1[0] : position1[1]] == text[position2[0] : position2[1]],
24
        ]
25
    )
26
27
28
def merge_positions(position1, position2):
29
    return (min(position1[0], position2[0]), max(position1[1], position2[1]))
30
31
32
def checkio(text, words):
33
    matched_positions = get_matched_positions(
34
        text.lower(), [i.lower() for i in words.split(' ')]
35
    )
36
    positions_updated = True
37
    sorted_positions = sorted(matched_positions)
38
    while positions_updated:
39
        positions_updated = False
40
        for index, value in enumerate(sorted_positions[:-1]):
41
            for i in product([value], sorted_positions[index + 1 :]):
42
                if positions_overlapped(i[0], i[1], text):
43
                    sorted_positions.remove(i[0])
44
                    sorted_positions.remove(i[1])
45
                    sorted_positions.append(merge_positions(*i))
46
                    sorted_positions = sorted(sorted_positions)
47
                    positions_updated = True
48
                    break
49
            if positions_updated:
50
                break
51
    reversed_positions = sorted(sorted_positions, reverse=True)
52
    for i in reversed_positions:
53
        text = text[: i[0]] + '<span>' + text[i[0] : i[1]] + '</span>' + text[i[1] :]
54
    return text
55
56
57
# These "asserts" using only for self-checking and not necessary for
58
# auto-testing
59
if __name__ == '__main__':  # pragma: no cover
60
    assert (
61
        checkio("This is only a text example for task example.", "example")
62
        == "This is only a text <span>example</span> for task <span>example</span>."
63
    ), "Simple test"
64
65
    assert (
66
        checkio("Python is a widely used high-level programming language.", "pyThoN")
67
        == "<span>Python</span> is a widely used high-level programming language."
68
    ), "Ignore letters cases, but keep original"
69
70
    assert (
71
        checkio(
72
            "It is experiment for control groups with similar distributions.", "is im"
73
        )
74
        == "It <span>is</span> exper<span>im</span>ent for control groups with s<span>im</span>ilar d<span>is</span>tributions."
75
    ), "Several subwords"
76
77
    assert (
78
        checkio(
79
            "The National Aeronautics and Space Administration (NASA).", "nasa  THE"
80
        )
81
        == "<span>The</span> National Aeronautics and Space Administration (<span>NASA</span>)."
82
    ), "two spaces"
83
84
    assert (
85
        checkio("Did you find anything?", "word space tree") == "Did you find anything?"
86
    ), "No comments"
87
88
    assert (
89
        checkio("Hello World! Or LOL", "hell world or lo")
90
        == "<span>Hello</span> <span>World</span>! <span>Or</span> <span>LO</span>L"
91
    ), "Contain or intersect"
92