Completed
Pull Request — master (#225)
by Chris
09:15
created

abydos.distance._gotoh.Gotoh.sim_score()   B

Complexity

Conditions 5

Size

Total Lines 72
Code Lines 32

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 25
CRAP Score 5

Importance

Changes 0
Metric Value
eloc 32
dl 0
loc 72
ccs 25
cts 25
cp 1
rs 8.6453
c 0
b 0
f 0
cc 5
nop 3
crap 5

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._gotoh.
20
21
Gotoh score
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from deprecation import deprecated
32
33 1
from numpy import float32 as np_float32
34 1
from numpy import zeros as np_zeros
35
36 1
from six.moves import range
37
38 1
from ._ident import sim_ident
39 1
from ._needleman_wunsch import NeedlemanWunsch
40 1
from .. import __version__
41
42 1
__all__ = ['Gotoh', 'gotoh']
43
44
45 1
class Gotoh(NeedlemanWunsch):
46
    """Gotoh score.
47
48
    The Gotoh score :cite:`Gotoh:1982` is essentially Needleman-Wunsch with
49
    affine gap penalties.
50
51
    .. versionadded:: 0.3.6
52
    """
53
54 1
    def __init__(self, gap_open=1, gap_ext=0.4, sim_func=None, **kwargs):
55
        """Initialize Gotoh instance.
56
57
        Parameters
58
        ----------
59
        gap_open : float
60
            The cost of an open alignment gap (1 by default)
61
        gap_ext : float
62
            The cost of an alignment gap extension (0.4 by default)
63
        sim_func : function
64
            A function that returns the similarity of two characters (identity
65
            similarity by default)
66
        **kwargs
67
            Arbitrary keyword arguments
68
69
70
        .. versionadded:: 0.4.0
71
72
        """
73 1
        super(Gotoh, self).__init__(**kwargs)
74 1
        self._gap_open = gap_open
75 1
        self._gap_ext = gap_ext
76 1
        self._sim_func = sim_func
77 1
        if self._sim_func is None:
78 1
            self._sim_func = NeedlemanWunsch.sim_matrix
79
80 1
    def sim_score(self, src, tar):
81
        """Return the Gotoh score of two strings.
82
83
        Parameters
84
        ----------
85
        src : str
86
            Source string for comparison
87
        tar : str
88
            Target string for comparison
89
90
        Returns
91
        -------
92
        float
93
            Gotoh score
94
95
        Examples
96
        --------
97
        >>> cmp = Gotoh()
98
        >>> cmp.sim_score('cat', 'hat')
99
        2.0
100
        >>> cmp.sim_score('Niall', 'Neil')
101
        1.0
102
        >>> round(cmp.sim_score('aluminum', 'Catalan'), 12)
103
        -0.4
104
        >>> cmp.sim_score('cat', 'hat')
105
        2.0
106
107
108
        .. versionadded:: 0.1.0
109
        .. versionchanged:: 0.3.6
110
            Encapsulated in class
111
112
        """
113 1
        d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
114 1
        p_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
115 1
        q_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
116
117 1
        d_mat[0, 0] = 0
118 1
        p_mat[0, 0] = float('-inf')
119 1
        q_mat[0, 0] = float('-inf')
120 1
        for i in range(1, len(src) + 1):
121 1
            d_mat[i, 0] = float('-inf')
122 1
            p_mat[i, 0] = -self._gap_open - self._gap_ext * (i - 1)
123 1
            q_mat[i, 0] = float('-inf')
124 1
            q_mat[i, 1] = -self._gap_open
125 1
        for j in range(1, len(tar) + 1):
126 1
            d_mat[0, j] = float('-inf')
127 1
            p_mat[0, j] = float('-inf')
128 1
            p_mat[1, j] = -self._gap_open
129 1
            q_mat[0, j] = -self._gap_open - self._gap_ext * (j - 1)
130
131 1
        for i in range(1, len(src) + 1):
132 1
            for j in range(1, len(tar) + 1):
133 1
                sim_val = self._sim_func(src[i - 1], tar[j - 1])
134 1
                d_mat[i, j] = max(
135
                    d_mat[i - 1, j - 1] + sim_val,
136
                    p_mat[i - 1, j - 1] + sim_val,
137
                    q_mat[i - 1, j - 1] + sim_val,
138
                )
139
140 1
                p_mat[i, j] = max(
141
                    d_mat[i - 1, j] - self._gap_open,
142
                    p_mat[i - 1, j] - self._gap_ext,
143
                )
144
145 1
                q_mat[i, j] = max(
146
                    d_mat[i, j - 1] - self._gap_open,
147
                    q_mat[i, j - 1] - self._gap_ext,
148
                )
149
150 1
        i, j = (n - 1 for n in d_mat.shape)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable n does not seem to be defined.
Loading history...
151 1
        return max(d_mat[i, j], p_mat[i, j], q_mat[i, j])
152
153 1
    def sim(self, src, tar):
154
        """Return the normalized Gotoh score of two strings.
155
156
        Parameters
157
        ----------
158
        src : str
159
            Source string for comparison
160
        tar : str
161
            Target string for comparison
162
163
        Returns
164
        -------
165
        float
166
            Normalized Gotoh score
167
168
        Examples
169
        --------
170
        >>> cmp = Gotoh()
171
        >>> cmp.sim('cat', 'hat')
172
        0.6666666666666667
173
        >>> cmp.sim('Niall', 'Neil')
174
        0.22360679774997896
175
        >>> round(cmp.sim('aluminum', 'Catalan'), 12)
176
        0.0
177
        >>> cmp.sim('cat', 'hat')
178
        0.6666666666666667
179
180
181
        .. versionadded:: 0.4.1
182
183
        """
184 1
        if src == tar:
185 1
            return 1.0
186 1
        return max(0.0, self.sim_score(src, tar)) / (
187
            self.sim_score(src, src) ** 0.5 * self.sim_score(tar, tar) ** 0.5
188
        )
189
190
191 1
@deprecated(
192
    deprecated_in='0.4.0',
193
    removed_in='0.6.0',
194
    current_version=__version__,
195
    details='Use the Gotoh.dist_abs method instead.',
196
)
197 1
def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident):
198
    """Return the Gotoh score of two strings.
199
200
    This is a wrapper for :py:meth:`Gotoh.dist_abs`.
201
202
    Parameters
203
    ----------
204
    src : str
205
        Source string for comparison
206
    tar : str
207
        Target string for comparison
208
    gap_open : float
209
        The cost of an open alignment gap (1 by default)
210
    gap_ext : float
211
        The cost of an alignment gap extension (0.4 by default)
212
    sim_func : function
213
        A function that returns the similarity of two characters (identity
214
        similarity by default)
215
216
    Returns
217
    -------
218
    float
219
        Gotoh score
220
221
    Examples
222
    --------
223
    >>> gotoh('cat', 'hat')
224
    2.0
225
    >>> gotoh('Niall', 'Neil')
226
    1.0
227
    >>> round(gotoh('aluminum', 'Catalan'), 12)
228
    -0.4
229
    >>> gotoh('cat', 'hat')
230
    2.0
231
232
    .. versionadded:: 0.1.0
233
234
    """
235 1
    return Gotoh(gap_open, gap_ext, sim_func).sim_score(src, tar)
236
237
238
if __name__ == '__main__':
239
    import doctest
240
241
    doctest.testmod()
242