SimpleComparable   A
last analyzed

Complexity

Total Complexity 1

Size/Duplication

Total Lines 15
Duplicated Lines 0 %

Importance

Changes 2
Bugs 0 Features 2
Metric Value
dl 0
loc 15
rs 10
c 2
b 0
f 2
wmc 1

1 Method

Rating   Name   Duplication   Size   Complexity  
A attributes() 0 4 1
1
"""Abstract base class and similarity functions."""
2
3
import logging
4
from collections import OrderedDict
5
from abc import ABCMeta, abstractmethod, abstractproperty  # pylint: disable=W0611
6
7
8
class _Base(object):  # pylint: disable=R0903
9
10
    """Shared base class."""
11
12
    def _repr(self, *args, **kwargs):
13
        """Return a __repr__ string from the arguments provided to __init__.
14
15
        @param args: list of arguments to __init__
16
        @param kwargs: dictionary of keyword arguments to __init__
17
        @return: __repr__ string
18
19
        """
20
        # Remove unnecessary empty keywords arguments and sort the arguments
21
        kwargs = {k: v for k, v in kwargs.items() if v is not None}
22
        kwargs = OrderedDict(sorted(kwargs.items()))
23
24
        # Build the __repr__ string pieces
25
        args_repr = ', '.join(repr(arg) for arg in args)
26
        kwargs_repr = ', '.join(k + '=' + repr(v) for k, v in kwargs.items())
27
        if args_repr and kwargs_repr:
28
            kwargs_repr = ', ' + kwargs_repr
29
        name = self.__class__.__name__
30
31
        return "{}({}{})".format(name, args_repr, kwargs_repr)
32
33
34
class Similarity(_Base):  # pylint: disable=R0903
35
36
    """Represents the similarity between two objects."""
37
38
    def __init__(self, value, threshold=1.0):
39
        self.value = float(value)
40
        self.threshold = float(threshold)
41
42
    def __repr__(self):
43
        return self._repr(self.value, threshold=self.threshold)
44
45
    def __str__(self):
46
        return "{:.1%} similar".format(self.value)
47
48
    def __eq__(self, other):
49
        return abs(float(self) - float(other)) < 0.001
50
51
    def __ne__(self, other):
52
        return not self == other
53
54
    def __lt__(self, other):
55
        return float(self) < float(other)
56
57
    def __gt__(self, other):
58
        return float(self) > float(other)
59
60
    def __bool__(self):
61
        """In boolean scenarios, similarity is True if the threshold is met."""
62
        return self.value >= self.threshold
63
64
    def __float__(self):
65
        """In non-boolean scenarios, similarity is treated like a float."""
66
        return self.value
67
68
    def __add__(self, other):
69
        return Similarity(self.value + float(other), threshold=self.threshold)
70
71
    def __radd__(self, other):
72
        return Similarity(float(other) + self.value, threshold=self.threshold)
73
74
    def __iadd__(self, other):
75
        self.value += float(other)
76
        return self
77
78
    def __sub__(self, other):
79
        return Similarity(self.value - float(other), threshold=self.threshold)
80
81
    def __rsub__(self, other):
82
        return Similarity(float(other) - self.value, threshold=self.threshold)
83
84
    def __isub__(self, other):
85
        self.value -= float(other)
86
        return self
87
88
    def __mul__(self, other):
89
        return Similarity(self.value * float(other), threshold=self.threshold)
90
91
    def __rmul__(self, other):
92
        return Similarity(float(other) * self.value, threshold=self.threshold)
93
94
    def __imul__(self, other):
95
        self.value *= float(other)
96
        return self
97
98
    def __abs__(self):
99
        return Similarity(abs(self.value), threshold=self.threshold)
100
101
    def __round__(self, digits):
102
        return Similarity(round(self.value, digits), threshold=self.threshold)
103
104
105
class _Indent(object):
106
107
    """Indent formatter for logging calls."""
108
109
    level = 0
110
111
    @classmethod
112
    def more(cls):
113
        """Increase the indent level."""
114
        cls.level += 1
115
116
    @classmethod
117
    def less(cls):
118
        """Decrease the indent level."""
119
        cls.level = max(cls.level - 1, 0)
120
121
    @classmethod
122
    def indent(cls, fmt):
123
        """Get a new format string with indentation."""
124
        return '| ' * cls.level + fmt
125
126
127
def equal(obj1, obj2):
128
    """Calculate equality between two (Comparable) objects."""
129
    Comparable.log(obj1, obj2, '==')
130
    equality = obj1.equality(obj2)
131
    Comparable.log(obj1, obj2, '==', result=equality)
132
    return equality
133
134
135
def similar(obj1, obj2):
136
    """Calculate similarity between two (Comparable) objects."""
137
    Comparable.log(obj1, obj2, '%')
138
    similarity = obj1.similarity(obj2)
139
    Comparable.log(obj1, obj2, '%', result=similarity)
140
    return similarity
141
142
143
class Comparable(_Base, metaclass=ABCMeta):
144
145
    """Abstract Base Class for objects that are comparable.
146
147
    Subclasses directly comparable must override the 'equality' and
148
    'similarity' methods to return a bool and 'Similarity' object,
149
    respectively.
150
151
    Subclasses comparable by attributes must override the
152
    'attributes' property to define which (Comparable) attributes
153
    should be considered.
154
155
    Both types of subclasses may also override the 'threshold'
156
    attribute to change the default similarity threshold.
157
158
    """
159
160
    def __eq__(self, other):
161
        """Map the '==' operator to be a shortcut for "equality"."""
162
        return equal(self, other)
163
164
    def __ne__(self, other):
165
        return not self == other
166
167
    def __mod__(self, other):
168
        """Map the '%' operator to be a shortcut for "similarity"."""
169
        return similar(self, other)
170
171
    @abstractproperty
172
    def attributes(self):  # pragma: no cover, abstract
173
        """Get an attribute {name: weight} dictionary for comparisons."""
174
        return {}
175
176
    threshold = 1.0  # ratio for two objects to be considered "similar"
177
178
    @abstractmethod
179
    def equality(self, other):
180
        """Compare two objects for equality.
181
182
        @param self: first object to compare
183
        @param other: second object to compare
184
185
        @return: boolean result of comparison
186
187
        """
188
        # Compare specified attributes for equality
189
        cname = self.__class__.__name__
190
        for aname in self.attributes:
191
            try:
192
                attr1 = getattr(self, aname)
193
                attr2 = getattr(other, aname)
194
            except AttributeError as error:
195
                logging.debug("%s.%s: %s", cname, aname, error)
196
                return False
197
            self.log(attr1, attr2, '==', cname=cname, aname=aname)
198
            eql = (attr1 == attr2)
199
            self.log(attr1, attr2, '==', cname=cname, aname=aname, result=eql)
200
            if not eql:
201
                return False
202
203
        return True
204
205
    @abstractmethod
206
    def similarity(self, other):
207
        """Compare two objects for similarity.
208
209
        @param self: first object to compare
210
        @param other: second object to compare
211
212
        @return: L{Similarity} result of comparison
213
214
        """
215
        sim = self.Similarity()
216
        total = 0.0
217
218
        # Calculate similarity ratio for each attribute
219
        cname = self.__class__.__name__
220
        for aname, weight in self.attributes.items():
221
222
            attr1 = getattr(self, aname, None)
223
            attr2 = getattr(other, aname, None)
224
            self.log(attr1, attr2, '%', cname=cname, aname=aname)
225
226
            # Similarity is ignored if None on both objects
227
            if attr1 is None and attr2 is None:
228
                self.log(attr1, attr2, '%', cname=cname, aname=aname,
229
                         result="attributes are both None")
230
                continue
231
232
            # Similarity is 0 if either attribute is non-Comparable
233
            if not all((isinstance(attr1, Comparable),
234
                        isinstance(attr2, Comparable))):
235
                self.log(attr1, attr2, '%', cname=cname, aname=aname,
236
                         result="attributes not Comparable")
237
                total += weight
238
                continue
239
240
            # Calculate similarity between the attributes
241
            attr_sim = (attr1 % attr2)
242
            self.log(attr1, attr2, '%', cname=cname, aname=aname,
243
                     result=attr_sim)
244
245
            # Add the similarity to the total
246
            sim += attr_sim * weight
247
            total += weight
248
249
        # Scale the similarity so the total is 1.0
250
        if total:
251
            sim *= (1.0 / total)
252
253
        return sim
254
255
    def Similarity(self, value=None):  # pylint: disable=C0103
256
        """Constructor for new default Similarities."""
257
        if value is None:
258
            value = 0.0
259
        return Similarity(value, threshold=self.threshold)
260
261
    @staticmethod
262
    def log(obj1, obj2, sym, cname=None, aname=None, result=None):  # pylint: disable=R0913
263
        """Log the objects being compared and the result.
264
265
        When no result object is specified, subsequence calls will have an
266
        increased indentation level. The indentation level is decreased
267
        once a result object is provided.
268
269
        @param obj1: first object
270
        @param obj2: second object
271
        @param sym: operation being performed ('==' or '%')
272
        @param cname: name of class (when attributes are being compared)
273
        @param aname: name of attribute (when attributes are being compared)
274
        @param result: outcome of comparison
275
276
        """
277
        fmt = "{o1} {sym} {o2} : {r}"
278
        if cname or aname:
279
            assert cname and aname  # both must be specified
280
            fmt = "{c}.{a}: " + fmt
281
282
        if result is None:
283
            result = '...'
284
            fmt = _Indent.indent(fmt)
285
            _Indent.more()
286
        else:
287
            _Indent.less()
288
            fmt = _Indent.indent(fmt)
289
290
        msg = fmt.format(o1=repr(obj1), o2=repr(obj2),
291
                         c=cname, a=aname, sym=sym, r=result)
292
        logging.info(msg)
293
294
295
class SimpleComparable(Comparable):  # pylint: disable=W0223
296
297
    """Abstract Base Class for objects that are directly comparable.
298
299
    Subclasses directly comparable must override the 'equality' and
300
    'similarity' methods to return a bool and 'Similarity' object,
301
    respectively. They may also override the 'threshold' attribute
302
    to change the default similarity threshold.
303
304
    """
305
306
    @property
307
    def attributes(self):  # pragma: no cover, abstract
308
        """A simple comparable does not use the attributes property."""
309
        raise AttributeError()
310
311
312
class CompoundComparable(Comparable):  # pylint: disable=W0223
313
314
    """Abstract Base Class for objects that are comparable by attributes.
315
316
    Subclasses comparable by attributes must override the
317
    'attributes' property to define which (Comparable) attributes
318
    should be considered. They may also override the 'threshold'
319
    attribute to change the default similarity threshold.
320
321
    """
322
323
    def equality(self, other):
324
        """A compound comparable's equality is based on attributes."""
325
        return super().equality(other)
326
327
    def similarity(self, other):
328
        """A compound comparable's similarity is based on attributes."""
329
        return super().similarity(other)
330