Completed
Push — master ( f43547...71985b )
by Chris
12:00 queued 10s
created

abydos.distance._minkowski.Minkowski.dist_abs()   B

Complexity

Conditions 8

Size

Total Lines 63
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 18
CRAP Score 8

Importance

Changes 0
Metric Value
cc 8
eloc 20
nop 7
dl 0
loc 63
ccs 18
cts 18
cp 1
crap 8
rs 7.3333
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._minkowski.
20
21
Minkowski distance & similarity
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from numbers import Number
32
33 1
from ._token_distance import _TokenDistance
34
35 1
__all__ = ['Minkowski', 'dist_minkowski', 'minkowski', 'sim_minkowski']
36
37
38 1
class Minkowski(_TokenDistance):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
39
    """Minkowski distance.
40
41
    The Minkowski distance :cite:`Minkowski:1910` is a distance metric in
42
    :math:`L^p-space`.
43
    """
44
45 1
    def dist_abs(
0 ignored issues
show
best-practice introduced by
Too many arguments (7/5)
Loading history...
Bug introduced by
Parameters differ from overridden 'dist_abs' method
Loading history...
46
        self, src, tar, qval=2, pval=1, normalized=False, alphabet=None
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
47
    ):
48
        """Return the Minkowski distance (:math:`L^p`-norm) of two strings.
49
50
        Parameters
51
        ----------
52
        src : str
53
            Source string (or QGrams/Counter objects) for comparison
54
        tar : str
55
            Target string (or QGrams/Counter objects) for comparison
56
        qval : int
57
            The length of each q-gram; 0 for non-q-gram version
58
        pval : int or float
59
            The :math:`p`-value of the :math:`L^p`-space
60
        normalized : bool
61
            Normalizes to [0, 1] if True
62
        alphabet : collection or int
63
            The values or size of the alphabet
64
65
        Returns
66
        -------
67
        float
68
            The Minkowski distance
69
70
        Examples
71
        --------
72
        >>> cmp = Minkowski()
73
        >>> cmp.dist_abs('cat', 'hat')
74
        4.0
75
        >>> cmp.dist_abs('Niall', 'Neil')
76
        7.0
77
        >>> cmp.dist_abs('Colin', 'Cuilen')
78
        9.0
79
        >>> cmp.dist_abs('ATCG', 'TAGC')
80
        10.0
81
82
        """
83 1
        q_src, q_tar = self._get_qgrams(src, tar, qval)
84 1
        diffs = ((q_src - q_tar) + (q_tar - q_src)).values()
85
86 1
        normalizer = 1
87 1
        if normalized:
88 1
            totals = (q_src + q_tar).values()
89 1
            if alphabet is not None:
90
                # noinspection PyTypeChecker
91 1
                normalizer = (
92
                    alphabet if isinstance(alphabet, Number) else len(alphabet)
93
                )
94 1
            elif pval == 0:
95 1
                normalizer = len(totals)
96
            else:
97 1
                normalizer = sum(_ ** pval for _ in totals) ** (1 / pval)
98
99 1
        if len(diffs) == 0:
0 ignored issues
show
Unused Code introduced by
Do not use len(SEQUENCE) as condition value
Loading history...
100 1
            return 0.0
101 1
        if pval == float('inf'):
102
            # Chebyshev distance
103 1
            return max(diffs) / normalizer
104 1
        if pval == 0:
105
            # This is the l_0 "norm" as developed by David Donoho
106 1
            return len(diffs) / normalizer
107 1
        return sum(_ ** pval for _ in diffs) ** (1 / pval) / normalizer
108
109 1
    def dist(self, src, tar, qval=2, pval=1, alphabet=None):
0 ignored issues
show
best-practice introduced by
Too many arguments (6/5)
Loading history...
Bug introduced by
Parameters differ from overridden 'dist' method
Loading history...
110
        """Return normalized Minkowski distance of two strings.
111
112
        The normalized Minkowski distance :cite:`Minkowski:1910` is a distance
113
        metric in :math:`L^p`-space, normalized to [0, 1].
114
115
        Parameters
116
        ----------
117
        src : str
118
            Source string (or QGrams/Counter objects) for comparison
119
        tar : str
120
            Target string (or QGrams/Counter objects) for comparison
121
        qval : int
122
            The length of each q-gram; 0 for non-q-gram version
123
        pval : int or float
124
            The :math:`p`-value of the :math:`L^p`-space
125
        alphabet : collection or int
126
            The values or size of the alphabet
127
128
        Returns
129
        -------
130
        float
131
            The normalized Minkowski distance
132
133
        Examples
134
        --------
135
        >>> cmp = Minkowski()
136
        >>> cmp.dist('cat', 'hat')
137
        0.5
138
        >>> round(cmp.dist('Niall', 'Neil'), 12)
139
        0.636363636364
140
        >>> round(cmp.dist('Colin', 'Cuilen'), 12)
141
        0.692307692308
142
        >>> cmp.dist('ATCG', 'TAGC')
143
        1.0
144
145
        """
146 1
        return self.dist_abs(src, tar, qval, pval, True, alphabet)
147
148
149 1
def minkowski(src, tar, qval=2, pval=1, normalized=False, alphabet=None):
0 ignored issues
show
best-practice introduced by
Too many arguments (6/5)
Loading history...
150
    """Return the Minkowski distance (:math:`L^p`-norm) of two strings.
151
152
    This is a wrapper for :py:meth:`Minkowski.dist_abs`.
153
154
    Parameters
155
    ----------
156
    src : str
157
        Source string (or QGrams/Counter objects) for comparison
158
    tar : str
159
        Target string (or QGrams/Counter objects) for comparison
160
    qval : int
161
        The length of each q-gram; 0 for non-q-gram version
162
    pval : int or float
163
        The :math:`p`-value of the :math:`L^p`-space
164
    normalized : bool
165
        Normalizes to [0, 1] if True
166
    alphabet : collection or int
167
        The values or size of the alphabet
168
169
    Returns
170
    -------
171
    float
172
        The Minkowski distance
173
174
    Examples
175
    --------
176
    >>> minkowski('cat', 'hat')
177
    4.0
178
    >>> minkowski('Niall', 'Neil')
179
    7.0
180
    >>> minkowski('Colin', 'Cuilen')
181
    9.0
182
    >>> minkowski('ATCG', 'TAGC')
183
    10.0
184
185
    """
186 1
    return Minkowski().dist_abs(src, tar, qval, pval, normalized, alphabet)
187
188
189 1
def dist_minkowski(src, tar, qval=2, pval=1, alphabet=None):
190
    """Return normalized Minkowski distance of two strings.
191
192
    This is a wrapper for :py:meth:`Minkowski.dist`.
193
194
    Parameters
195
    ----------
196
    src : str
197
        Source string (or QGrams/Counter objects) for comparison
198
    tar : str
199
        Target string (or QGrams/Counter objects) for comparison
200
    qval : int
201
        The length of each q-gram; 0 for non-q-gram version
202
    pval : int or float
203
        The :math:`p`-value of the :math:`L^p`-space
204
    alphabet : collection or int
205
        The values or size of the alphabet
206
207
    Returns
208
    -------
209
    float
210
        The normalized Minkowski distance
211
212
    Examples
213
    --------
214
    >>> dist_minkowski('cat', 'hat')
215
    0.5
216
    >>> round(dist_minkowski('Niall', 'Neil'), 12)
217
    0.636363636364
218
    >>> round(dist_minkowski('Colin', 'Cuilen'), 12)
219
    0.692307692308
220
    >>> dist_minkowski('ATCG', 'TAGC')
221
    1.0
222
223
    """
224 1
    return Minkowski().dist(src, tar, qval, pval, alphabet)
225
226
227 1
def sim_minkowski(src, tar, qval=2, pval=1, alphabet=None):
228
    """Return normalized Minkowski similarity of two strings.
229
230
    This is a wrapper for :py:meth:`Minkowski.sim`.
231
232
    Parameters
233
    ----------
234
    src : str
235
        Source string (or QGrams/Counter objects) for comparison
236
    tar : str
237
        Target string (or QGrams/Counter objects) for comparison
238
    qval : int
239
        The length of each q-gram; 0 for non-q-gram version
240
    pval : int or float
241
        The :math:`p`-value of the :math:`L^p`-space
242
    alphabet : collection or int
243
        The values or size of the alphabet
244
245
    Returns
246
    -------
247
    float
248
        The normalized Minkowski similarity
249
250
    Examples
251
    --------
252
    >>> sim_minkowski('cat', 'hat')
253
    0.5
254
    >>> round(sim_minkowski('Niall', 'Neil'), 12)
255
    0.363636363636
256
    >>> round(sim_minkowski('Colin', 'Cuilen'), 12)
257
    0.307692307692
258
    >>> sim_minkowski('ATCG', 'TAGC')
259
    0.0
260
261
    """
262 1
    return Minkowski().sim(src, tar, qval, pval, alphabet)
263
264
265
if __name__ == '__main__':
266
    import doctest
267
268
    doctest.testmod()
269