Completed
Push — master ( 643512...2b6b3e )
by Chris
20:40 queued 10:36
created

abydos.distance._millar.Millar.dist_abs()   B

Complexity

Conditions 5

Size

Total Lines 55
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 19
CRAP Score 5

Importance

Changes 0
Metric Value
eloc 19
dl 0
loc 55
ccs 19
cts 19
cp 1
rs 8.9833
c 0
b 0
f 0
cc 5
nop 3
crap 5

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2019 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._millar.
20
21
Millar's binomial deviance dissimilarity
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from math import log
32
33 1
from ._token_distance import _TokenDistance
34
35 1
__all__ = ['Millar']
36
37
38 1
class Millar(_TokenDistance):
39
    r"""Millar's binomial deviance dissimilarity.
40
41
    For two sets X and Y drawn from a population S, Millar's binomial deviance
42
    dissimilarity :cite:`Anderson:2004` is:
43
44
        .. math::
45
46
            dist_{Millar}(X, Y) = \sum_{i=0}^{|S|} \frac{1}{x_i+y_i}
47
            \bigg\{x_i log(\frac{x_i}{x_i+y_i}) + y_i log(\frac{y_i}{x_i+y_i})
48
            - (x_i+y_i) log(\frac{1}{2})\bigg\}
49
50
51
    .. versionadded:: 0.4.1
52
    """
53
54 1
    def __init__(self, **kwargs):
55
        """Initialize Millar instance.
56
57
        Parameters
58
        ----------
59
        **kwargs
60
            Arbitrary keyword arguments
61
62
63
        .. versionadded:: 0.4.1
64
65
        """
66 1
        super(Millar, self).__init__(**kwargs)
67
68 1
    def dist_abs(self, src, tar):
69
        """Return Millar's binomial deviance dissimilarity of two strings.
70
71
        Parameters
72
        ----------
73
        src : str
74
            Source string for comparison
75
        tar : str
76
            Target string for comparison
77
78
        Returns
79
        -------
80
        float
81
            Millar's binomial deviance dissimilarity
82
83
        Examples
84
        --------
85
        >>> cmp = Millar()
86
        >>> cmp.dist_abs('cat', 'hat')
87
        2.772588722239781
88
        >>> cmp.dist_abs('Niall', 'Neil')
89
        4.852030263919617
90
        >>> cmp.dist_abs('aluminum', 'Catalan')
91
        9.704060527839234
92
        >>> cmp.dist_abs('ATCG', 'TAGC')
93
        6.931471805599453
94
95
96
        .. versionadded:: 0.4.1
97
98
        """
99 1
        self._tokenize(src, tar)
100
101 1
        src_tok = self._src_tokens
102 1
        tar_tok = self._tar_tokens
103 1
        alphabet = set(set(src_tok.keys()) | set(tar_tok.keys()))
104
105 1
        log2 = log(2)
106 1
        score = 0
107 1
        for tok in alphabet:
108 1
            n_k = src_tok[tok] + tar_tok[tok]
109
110 1
            src_val = 0
111 1
            if src_tok[tok]:
112 1
                src_val = src_tok[tok] * log(src_tok[tok] / n_k)
113
114 1
            tar_val = 0
115 1
            if tar_tok[tok]:
116 1
                tar_val = tar_tok[tok] * log(tar_tok[tok] / n_k)
117
118 1
            score += (src_val + tar_val + n_k * log2) / n_k
119
120 1
        if score > 0:
121 1
            return score
122 1
        return 0.0
123
124 1
    def sim(self, *args, **kwargs):
125
        """Raise exception when called.
126
127
        Parameters
128
        ----------
129
        *args
130
            Variable length argument list
131
        **kwargs
132
            Arbitrary keyword arguments
133
134
        Raises
135
        ------
136
        NotImplementedError
137
            Method disabled for Millar dissimilarity.
138
139
140
        .. versionadded:: 0.3.6
141
142
        """
143 1
        raise NotImplementedError('Method disabled for Millar dissimilarity.')
144
145 1
    def dist(self, *args, **kwargs):
146
        """Raise exception when called.
147
148
        Parameters
149
        ----------
150
        *args
151
            Variable length argument list
152
        **kwargs
153
            Arbitrary keyword arguments
154
155
        Raises
156
        ------
157
        NotImplementedError
158
            Method disabled for Millar dissimilarity.
159
160
161
        .. versionadded:: 0.3.6
162
163
        """
164 1
        raise NotImplementedError('Method disabled for Millar dissimilarity.')
165
166
167
if __name__ == '__main__':
168
    import doctest
169
170
    doctest.testmod()
171