Text   A
last analyzed

Complexity

Total Complexity 2

Size/Duplication

Total Lines 15
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
dl 0
loc 15
rs 10
c 1
b 0
f 1
wmc 2

2 Methods

Rating   Name   Duplication   Size   Complexity  
A equality() 0 3 1
A similarity() 0 5 1
1
"""Class definitions for simple comparable types."""
2
3
import logging
4
from difflib import SequenceMatcher
5
6
from comparable.base import SimpleComparable
7
8
9
class _Simple(SimpleComparable):  # pylint: disable=W0223
10
11
    """SimpleComparable with common magic methods implemented."""
12
13
    def __init__(self, value):
14
        self.value = value
15
16
    def __repr__(self):
17
        return self._repr(self.value)
18
19
    def __str__(self):
20
        return str(self.value)
21
22
    def __float__(self):
23
        return float(self.value)
24
25
    def __bool__(self):
26
        return bool(self.value)
27
28
29
class Number(_Simple):
30
31
    """Comparable positive number."""
32
33
    threshold = 0.999  # 99.9% similar
34
35
    def __init__(self, value):
36
        super().__init__(value)
37
        if value < 0:
38
            raise ValueError("Number objects can only be positive")
39
40
    def equality(self, other):
41
        """Get equality using floating point equality."""
42
        return float(self) == float(other)
43
44
    def similarity(self, other):
45
        """Get similarity as a ratio of the two numbers."""
46
        numerator, denominator = sorted((self.value, other.value))
47
        try:
48
            ratio = float(numerator) / denominator
49
        except ZeroDivisionError:
50
            ratio = 0.0 if numerator else 1.0
51
        similarity = self.Similarity(ratio)
52
        return similarity
53
54
55
class Text(_Simple):
56
57
    """Comparable generic text."""
58
59
    threshold = 0.83  # "Hello, world!" ~ "hello world"
60
61
    def equality(self, other):
62
        """Get equality using string comparison."""
63
        return str(self) == str(other)
64
65
    def similarity(self, other):
66
        """Get similarity as a ratio of the two texts."""
67
        ratio = SequenceMatcher(a=self.value, b=other.value).ratio()
68
        similarity = self.Similarity(ratio)
69
        return similarity
70
71
72
class TextEnum(Text):
73
74
    """Comparable case-insensitive textual enumeration."""
75
76
    threshold = 1.0  # enumerations must match
77
78
    def similarity(self, other):
79
        """Get similarity as a discrete ratio (1.0 or 0.0)."""
80
        ratio = 1.0 if (str(self).lower() == str(other).lower()) else 0.0
81
        similarity = self.Similarity(ratio)
82
        return similarity
83
84
85
class TextTitle(Text):
86
87
    """Comparable case-insensitive textual titles."""
88
89
    threshold = 0.93  # "The Cat and the Hat" ~ "cat an' the hat"
90
91
    ARTICLES = 'a', 'an', 'the'  # stripped from the front
92
    JOINERS = '&', '+'  # replaced with 'and'
93
94
    def __init__(self, value):
95
        super().__init__(value)
96
        self.stripped = self._strip(self.value)
97
        logging.debug("stripped %r to %r", self.value, self.stripped)
98
99
    @staticmethod
100
    def _strip(text):
101
        """Strip articles/whitespace and remove case."""
102
        text = text.strip()
103
        text = text.replace('  ', ' ')  # remove duplicate spaces
104
        text = text.lower()
105
        for joiner in TextTitle.JOINERS:
106
            text = text.replace(joiner, 'and')
107
        for article in TextTitle.ARTICLES:
108
            if text.startswith(article + ' '):
109
                text = text[len(article) + 1:]
110
                break
111
        return text
112
113
    def similarity(self, other):
114
        """Get similarity as a ratio of the stripped text."""
115
        logging.debug("comparing %r and %r...", self.stripped, other.stripped)
116
        ratio = SequenceMatcher(a=self.stripped, b=other.stripped).ratio()
117
        similarity = self.Similarity(ratio)
118
        return similarity
119