|
1
|
|
|
from itertools import product |
|
2
|
|
|
|
|
3
|
|
|
|
|
4
|
|
|
def get_matched_positions(text, keywords): |
|
5
|
|
|
results = [] |
|
6
|
|
|
for i in keywords: |
|
7
|
|
|
if i: |
|
8
|
|
|
index = 0 |
|
9
|
|
|
while index < len(text): |
|
10
|
|
|
index = text.find(i, index) |
|
11
|
|
|
if index == -1: |
|
12
|
|
|
break |
|
13
|
|
|
results.append((index, index + len(i))) |
|
14
|
|
|
index += len(i) |
|
15
|
|
|
return results |
|
16
|
|
|
|
|
17
|
|
|
|
|
18
|
|
|
def positions_overlapped(position1, position2, text): |
|
19
|
|
|
return any( |
|
20
|
|
|
[ |
|
21
|
|
|
position1[1] > position2[0], |
|
22
|
|
|
position1[1] == position2[0] |
|
23
|
|
|
and text[position1[0] : position1[1]] == text[position2[0] : position2[1]], |
|
24
|
|
|
] |
|
25
|
|
|
) |
|
26
|
|
|
|
|
27
|
|
|
|
|
28
|
|
|
def merge_positions(position1, position2): |
|
29
|
|
|
return (min(position1[0], position2[0]), max(position1[1], position2[1])) |
|
30
|
|
|
|
|
31
|
|
|
|
|
32
|
|
|
def checkio(text, words): |
|
33
|
|
|
matched_positions = get_matched_positions( |
|
34
|
|
|
text.lower(), [i.lower() for i in words.split(' ')] |
|
35
|
|
|
) |
|
36
|
|
|
positions_updated = True |
|
37
|
|
|
sorted_positions = sorted(matched_positions) |
|
38
|
|
|
while positions_updated: |
|
39
|
|
|
positions_updated = False |
|
40
|
|
|
for index, value in enumerate(sorted_positions[:-1]): |
|
41
|
|
|
for i in product([value], sorted_positions[index + 1 :]): |
|
42
|
|
|
if positions_overlapped(i[0], i[1], text): |
|
43
|
|
|
sorted_positions.remove(i[0]) |
|
44
|
|
|
sorted_positions.remove(i[1]) |
|
45
|
|
|
sorted_positions.append(merge_positions(*i)) |
|
46
|
|
|
sorted_positions = sorted(sorted_positions) |
|
47
|
|
|
positions_updated = True |
|
48
|
|
|
break |
|
49
|
|
|
if positions_updated: |
|
50
|
|
|
break |
|
51
|
|
|
reversed_positions = sorted(sorted_positions, reverse=True) |
|
52
|
|
|
for i in reversed_positions: |
|
53
|
|
|
text = text[: i[0]] + '<span>' + text[i[0] : i[1]] + '</span>' + text[i[1] :] |
|
54
|
|
|
return text |
|
55
|
|
|
|
|
56
|
|
|
|
|
57
|
|
|
# These "asserts" using only for self-checking and not necessary for |
|
58
|
|
|
# auto-testing |
|
59
|
|
|
if __name__ == '__main__': # pragma: no cover |
|
60
|
|
|
assert ( |
|
61
|
|
|
checkio("This is only a text example for task example.", "example") |
|
62
|
|
|
== "This is only a text <span>example</span> for task <span>example</span>." |
|
63
|
|
|
), "Simple test" |
|
64
|
|
|
|
|
65
|
|
|
assert ( |
|
66
|
|
|
checkio("Python is a widely used high-level programming language.", "pyThoN") |
|
67
|
|
|
== "<span>Python</span> is a widely used high-level programming language." |
|
68
|
|
|
), "Ignore letters cases, but keep original" |
|
69
|
|
|
|
|
70
|
|
|
assert ( |
|
71
|
|
|
checkio( |
|
72
|
|
|
"It is experiment for control groups with similar distributions.", "is im" |
|
73
|
|
|
) |
|
74
|
|
|
== "It <span>is</span> exper<span>im</span>ent for control groups with s<span>im</span>ilar d<span>is</span>tributions." |
|
75
|
|
|
), "Several subwords" |
|
76
|
|
|
|
|
77
|
|
|
assert ( |
|
78
|
|
|
checkio( |
|
79
|
|
|
"The National Aeronautics and Space Administration (NASA).", "nasa THE" |
|
80
|
|
|
) |
|
81
|
|
|
== "<span>The</span> National Aeronautics and Space Administration (<span>NASA</span>)." |
|
82
|
|
|
), "two spaces" |
|
83
|
|
|
|
|
84
|
|
|
assert ( |
|
85
|
|
|
checkio("Did you find anything?", "word space tree") == "Did you find anything?" |
|
86
|
|
|
), "No comments" |
|
87
|
|
|
|
|
88
|
|
|
assert ( |
|
89
|
|
|
checkio("Hello World! Or LOL", "hell world or lo") |
|
90
|
|
|
== "<span>Hello</span> <span>World</span>! <span>Or</span> <span>LO</span>L" |
|
91
|
|
|
), "Contain or intersect" |
|
92
|
|
|
|