abydos.distance._gotoh.Gotoh.sim()   A
last analyzed

Complexity

Conditions 2

Size

Total Lines 35
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 2

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 35
ccs 3
cts 3
cp 1
rs 10
c 0
b 0
f 0
cc 2
nop 3
crap 2
1
# Copyright 2014-2020 by Christopher C. Little.
2
# This file is part of Abydos.
3
#
4
# Abydos is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# Abydos is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17
"""abydos.distance._gotoh.
18
19 1
Gotoh score
20
"""
21
from typing import Any, Callable, Optional, cast
22
23
from numpy import float_ as np_float
24 1
from numpy import zeros as np_zeros
25
26
from ._needleman_wunsch import NeedlemanWunsch
27
28
__all__ = ['Gotoh']
29
30
31 1
class Gotoh(NeedlemanWunsch):
32
    """Gotoh score.
33 1
34 1
    The Gotoh score :cite:`Gotoh:1982` is essentially Needleman-Wunsch with
35
    affine gap penalties.
36 1
37
    .. versionadded:: 0.3.6
38 1
    """
39 1
40 1
    def __init__(
41
        self,
42 1
        gap_open: float = 1,
43
        gap_ext: float = 0.4,
44
        sim_func: Optional[Callable[[str, str], float]] = None,
45 1
        **kwargs: Any
46
    ) -> None:
47
        """Initialize Gotoh instance.
48
49
        Parameters
50
        ----------
51
        gap_open : float
52
            The cost of an open alignment gap (1 by default)
53
        gap_ext : float
54 1
            The cost of an alignment gap extension (0.4 by default)
55
        sim_func : function
56
            A function that returns the similarity of two characters (identity
57
            similarity by default)
58
        **kwargs
59
            Arbitrary keyword arguments
60
61
62
        .. versionadded:: 0.4.0
63
64
        """
65
        super(Gotoh, self).__init__(**kwargs)
66
        self._gap_open = gap_open
67
        self._gap_ext = gap_ext
68
        self._sim_func = cast(
69
            Callable[[str, str], float],
70
            NeedlemanWunsch.sim_matrix if sim_func is None else sim_func,
71
        )  # type: Callable[[str, str], float]
72
73 1
    def sim_score(self, src: str, tar: str) -> float:
74 1
        """Return the Gotoh score of two strings.
75 1
76 1
        Parameters
77 1
        ----------
78 1
        src : str
79
            Source string for comparison
80 1
        tar : str
81
            Target string for comparison
82
83
        Returns
84
        -------
85
        float
86
            Gotoh score
87
88
        Examples
89
        --------
90
        >>> cmp = Gotoh()
91
        >>> cmp.sim_score('cat', 'hat')
92
        2.0
93
        >>> cmp.sim_score('Niall', 'Neil')
94
        1.0
95
        >>> round(cmp.sim_score('aluminum', 'Catalan'), 12)
96
        -0.4
97
        >>> cmp.sim_score('cat', 'hat')
98
        2.0
99
100
101
        .. versionadded:: 0.1.0
102
        .. versionchanged:: 0.3.6
103
            Encapsulated in class
104
105
        """
106
        d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float)
107
        p_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float)
108
        q_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float)
109
110
        d_mat[0, 0] = 0
111
        p_mat[0, 0] = float('-inf')
112
        q_mat[0, 0] = float('-inf')
113 1
        for i in range(1, len(src) + 1):
114 1
            d_mat[i, 0] = float('-inf')
115 1
            p_mat[i, 0] = -self._gap_open - self._gap_ext * (i - 1)
116
            q_mat[i, 0] = float('-inf')
117 1
            if len(tar) > 1:
118 1
                q_mat[i, 1] = -self._gap_open
119 1
        for j in range(1, len(tar) + 1):
120 1
            d_mat[0, j] = float('-inf')
121 1
            p_mat[0, j] = float('-inf')
122 1
            if len(src) > 1:
123 1
                p_mat[1, j] = -self._gap_open
124 1
            q_mat[0, j] = -self._gap_open - self._gap_ext * (j - 1)
125 1
126 1
        for i in range(1, len(src) + 1):
127 1
            for j in range(1, len(tar) + 1):
128 1
                sim_val = self._sim_func(src[i - 1], tar[j - 1])
129 1
                d_mat[i, j] = max(
130 1
                    d_mat[i - 1, j - 1] + sim_val,
131 1
                    p_mat[i - 1, j - 1] + sim_val,
132
                    q_mat[i - 1, j - 1] + sim_val,
133 1
                )
134 1
135 1
                p_mat[i, j] = max(
136 1
                    d_mat[i - 1, j] - self._gap_open,
137
                    p_mat[i - 1, j] - self._gap_ext,
138
                )
139
140
                q_mat[i, j] = max(
141
                    d_mat[i, j - 1] - self._gap_open,
142 1
                    q_mat[i, j - 1] - self._gap_ext,
143
                )
144
145
        i, j = (n - 1 for n in d_mat.shape)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable n does not seem to be defined.
Loading history...
146
        return cast(float, max(d_mat[i, j], p_mat[i, j], q_mat[i, j]))
147 1
148
    def sim(self, src: str, tar: str) -> float:
149
        """Return the normalized Gotoh score of two strings.
150
151
        Parameters
152 1
        ----------
153 1
        src : str
154
            Source string for comparison
155 1
        tar : str
156
            Target string for comparison
157
158
        Returns
159
        -------
160
        float
161
            Normalized Gotoh score
162
163
        Examples
164
        --------
165
        >>> cmp = Gotoh()
166
        >>> cmp.sim('cat', 'hat')
167
        0.6666666666666667
168
        >>> cmp.sim('Niall', 'Neil')
169
        0.22360679774997896
170
        >>> round(cmp.sim('aluminum', 'Catalan'), 12)
171
        0.0
172
        >>> cmp.sim('cat', 'hat')
173
        0.6666666666666667
174
175
176
        .. versionadded:: 0.4.1
177
178
        """
179
        if src == tar:
180
            return 1.0
181
        return max(0.0, self.sim_score(src, tar)) / (
182
            self.sim_score(src, src) ** 0.5 * self.sim_score(tar, tar) ** 0.5
183
        )
184
185
186 1
if __name__ == '__main__':
187 1
    import doctest
188 1
189
    doctest.testmod()
190