keywords_finder.checkio() - Code Metrics - Inspection of "feat(Storage): Keywords Finder" - KenMercusLai/checkio - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 9e6bed...ccd378 )

by Ken M.

created 2019-03-06 08:54 UTC

keywords_finder.checkio() B

↳ Parent: keywords_finder

Complexity

Conditions

Size

Total Lines	23
Code Lines	22

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	7
eloc	22
nop	2
dl	0
loc	23
rs	7.952
c	0
b	0
f	0

from itertools import product


def get_matched_positions(text, keywords):
    results = []
    for i in keywords:
        if i:
            index = 0
            while index < len(text):
                index = text.find(i, index)
                if index == -1:
                    break
                results.append((index, index + len(i)))
                index += len(i)
    return results


def positions_overlapped(position1, position2, text):
    return any(
        [
            position1[1] > position2[0],
            position1[1] == position2[0]
            and text[position1[0] : position1[1]] == text[position2[0] : position2[1]],
        ]
    )


def merge_positions(position1, position2):
    return (min(position1[0], position2[0]), max(position1[1], position2[1]))


def checkio(text, words):
    matched_positions = get_matched_positions(
        text.lower(), [i.lower() for i in words.split(' ')]
    )
    positions_updated = True
    sorted_positions = sorted(matched_positions)
    while positions_updated:
        positions_updated = False
        for index, value in enumerate(sorted_positions[:-1]):
            for i in product([value], sorted_positions[index + 1 :]):
                if positions_overlapped(i[0], i[1], text):
                    sorted_positions.remove(i[0])
                    sorted_positions.remove(i[1])
                    sorted_positions.append(merge_positions(*i))
                    sorted_positions = sorted(sorted_positions)
                    positions_updated = True
                    break
            if positions_updated:
                break
    reversed_positions = sorted(sorted_positions, reverse=True)
    for i in reversed_positions:
        text = text[: i[0]] + '<span>' + text[i[0] : i[1]] + '</span>' + text[i[1] :]
    return text


# These "asserts" using only for self-checking and not necessary for
# auto-testing
if __name__ == '__main__':  # pragma: no cover
    assert (
        checkio("This is only a text example for task example.", "example")
        == "This is only a text <span>example</span> for task <span>example</span>."
    ), "Simple test"

    assert (
        checkio("Python is a widely used high-level programming language.", "pyThoN")
        == "<span>Python</span> is a widely used high-level programming language."
    ), "Ignore letters cases, but keep original"

    assert (
        checkio(
            "It is experiment for control groups with similar distributions.", "is im"
        )
        == "It <span>is</span> exper<span>im</span>ent for control groups with s<span>im</span>ilar d<span>is</span>tributions."
    ), "Several subwords"

    assert (
        checkio(
            "The National Aeronautics and Space Administration (NASA).", "nasa  THE"
        )
        == "<span>The</span> National Aeronautics and Space Administration (<span>NASA</span>)."
    ), "two spaces"

    assert (
        checkio("Did you find anything?", "word space tree") == "Did you find anything?"
    ), "No comments"

    assert (
        checkio("Hello World! Or LOL", "hell world or lo")
        == "<span>Hello</span> <span>World</span>! <span>Or</span> <span>LO</span>L"
    ), "Contain or intersect"


1			from itertools import product
2
3
4			def get_matched_positions(text, keywords):
5			results = []
6			for i in keywords:
7			if i:
8			index = 0
9			while index < len(text):
10			index = text.find(i, index)
11			if index == -1:
12			break
13			results.append((index, index + len(i)))
14			index += len(i)
15			return results
16
17
18			def positions_overlapped(position1, position2, text):
19			return any(
20			[
21			position1[1] > position2[0],
22			position1[1] == position2[0]
23			and text[position1[0] : position1[1]] == text[position2[0] : position2[1]],
24			]
25			)
26
27
28			def merge_positions(position1, position2):
29			return (min(position1[0], position2[0]), max(position1[1], position2[1]))
30
31
32			def checkio(text, words):
33			matched_positions = get_matched_positions(
34			text.lower(), [i.lower() for i in words.split(' ')]
35			)
36			positions_updated = True
37			sorted_positions = sorted(matched_positions)
38			while positions_updated:
39			positions_updated = False
40			for index, value in enumerate(sorted_positions[:-1]):
41			for i in product([value], sorted_positions[index + 1 :]):
42			if positions_overlapped(i[0], i[1], text):
43			sorted_positions.remove(i[0])
44			sorted_positions.remove(i[1])
45			sorted_positions.append(merge_positions(*i))
46			sorted_positions = sorted(sorted_positions)
47			positions_updated = True
48			break
49			if positions_updated:
50			break
51			reversed_positions = sorted(sorted_positions, reverse=True)
52			for i in reversed_positions:
53			text = text[: i[0]] + '<span>' + text[i[0] : i[1]] + '</span>' + text[i[1] :]
54			return text
55
56
57			# These "asserts" using only for self-checking and not necessary for
58			# auto-testing
59			if __name__ == '__main__': # pragma: no cover
60			assert (
61			checkio("This is only a text example for task example.", "example")
62			== "This is only a text <span>example</span> for task <span>example</span>."
63			), "Simple test"
64
65			assert (
66			checkio("Python is a widely used high-level programming language.", "pyThoN")
67			== "<span>Python</span> is a widely used high-level programming language."
68			), "Ignore letters cases, but keep original"
69
70			assert (
71			checkio(
72			"It is experiment for control groups with similar distributions.", "is im"
73			)
74			== "It <span>is</span> exper<span>im</span>ent for control groups with s<span>im</span>ilar d<span>is</span>tributions."
75			), "Several subwords"
76
77			assert (
78			checkio(
79			"The National Aeronautics and Space Administration (NASA).", "nasa THE"
80			)
81			== "<span>The</span> National Aeronautics and Space Administration (<span>NASA</span>)."
82			), "two spaces"
83
84			assert (
85			checkio("Did you find anything?", "word space tree") == "Did you find anything?"
86			), "No comments"
87
88			assert (
89			checkio("Hello World! Or LOL", "hell world or lo")
90			== "<span>Hello</span> <span>World</span>! <span>Or</span> <span>LO</span>L"
91			), "Contain or intersect"
92

KenMercusLai / checkio

Push — master ( 9e6bed...ccd378 )

keywords_finder.checkio() B

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like