Completed
Pull Request — master (#225)
by Chris
09:15
created

SmithWaterman.dist_abs()   A

Complexity

Conditions 3

Size

Total Lines 44
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 3

Importance

Changes 0
Metric Value
eloc 10
dl 0
loc 44
ccs 9
cts 9
cp 1
rs 9.9
c 0
b 0
f 0
cc 3
nop 3
crap 3
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._smith_waterman.
20
21
Smith-Waterman score
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from deprecation import deprecated
32
33 1
from numpy import float32 as np_float32
34 1
from numpy import zeros as np_zeros
35
36 1
from six.moves import range
37
38 1
from ._ident import sim_ident
39 1
from ._needleman_wunsch import NeedlemanWunsch
40 1
from .. import __version__
41
42 1
__all__ = ['SmithWaterman', 'smith_waterman']
43
44
45 1
class SmithWaterman(NeedlemanWunsch):
46
    """Smith-Waterman score.
47
48
    The Smith-Waterman score :cite:`Smith:1981` is a standard edit distance
49
    measure, differing from Needleman-Wunsch in that it focuses on local
50
    alignment and disallows negative scores.
51
52
    .. versionadded:: 0.3.6
53
    """
54
55 1
    def __init__(self, gap_cost=1, sim_func=None, **kwargs):
56
        """Initialize SmithWaterman instance.
57
58
        Parameters
59
        ----------
60
        gap_cost : float
61
            The cost of an alignment gap (1 by default)
62
        sim_func : function
63
            A function that returns the similarity of two characters (identity
64
            similarity by default)
65
        **kwargs
66
            Arbitrary keyword arguments
67
68
69
        .. versionadded:: 0.4.0
70
71
        """
72 1
        super(SmithWaterman, self).__init__(**kwargs)
73 1
        self._gap_cost = gap_cost
74 1
        self._sim_func = sim_func
75 1
        if self._sim_func is None:
76 1
            self._sim_func = NeedlemanWunsch.sim_matrix
77
78 1
    def sim_score(self, src, tar):
79
        """Return the Smith-Waterman score of two strings.
80
81
        Parameters
82
        ----------
83
        src : str
84
            Source string for comparison
85
        tar : str
86
            Target string for comparison
87
88
        Returns
89
        -------
90
        float
91
            Smith-Waterman score
92
93
        Examples
94
        --------
95
        >>> cmp = SmithWaterman()
96
        >>> cmp.sim_score('cat', 'hat')
97
        2.0
98
        >>> cmp.sim_score('Niall', 'Neil')
99
        1.0
100
        >>> cmp.sim_score('aluminum', 'Catalan')
101
        0.0
102
        >>> cmp.sim_score('ATCG', 'TAGC')
103
        1.0
104
105
106
        .. versionadded:: 0.1.0
107
        .. versionchanged:: 0.3.6
108
            Encapsulated in class
109
110
        """
111 1
        d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
112
113 1
        for i in range(1, len(src) + 1):
114 1
            for j in range(1, len(tar) + 1):
115 1
                match = d_mat[i - 1, j - 1] + self._sim_func(
116
                    src[i - 1], tar[j - 1]
117
                )
118 1
                delete = d_mat[i - 1, j] - self._gap_cost
119 1
                insert = d_mat[i, j - 1] - self._gap_cost
120 1
                d_mat[i, j] = max(0, match, delete, insert)
121 1
        return d_mat[d_mat.shape[0] - 1, d_mat.shape[1] - 1]
122
123 1
    def sim(self, src, tar):
124
        """Return the normalized Smith-Waterman score of two strings.
125
126
        Parameters
127
        ----------
128
        src : str
129
            Source string for comparison
130
        tar : str
131
            Target string for comparison
132
133
        Returns
134
        -------
135
        float
136
            Normalized Smith-Waterman score
137
138
        Examples
139
        --------
140
        >>> cmp = SmithWaterman()
141
        >>> cmp.sim('cat', 'hat')
142
        0.6666666666666667
143
        >>> cmp.sim('Niall', 'Neil')
144
        0.22360679774997896
145
        >>> round(cmp.sim('aluminum', 'Catalan'), 12)
146
        0.0
147
        >>> cmp.sim('cat', 'hat')
148
        0.6666666666666667
149
150
151
        .. versionadded:: 0.4.1
152
153
        """
154 1
        if src == tar:
155 1
            return 1.0
156 1
        return max(0.0, self.sim_score(src, tar)) / (
157
            self.sim_score(src, src) ** 0.5 * self.sim_score(tar, tar) ** 0.5
158
        )
159
160
161 1
@deprecated(
162
    deprecated_in='0.4.0',
163
    removed_in='0.6.0',
164
    current_version=__version__,
165
    details='Use the SmithWaterman.dist_abs method instead.',
166
)
167 1
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
168
    """Return the Smith-Waterman score of two strings.
169
170
    This is a wrapper for :py:meth:`SmithWaterman.dist_abs`.
171
172
    Parameters
173
    ----------
174
    src : str
175
        Source string for comparison
176
    tar : str
177
        Target string for comparison
178
    gap_cost : float
179
        The cost of an alignment gap (1 by default)
180
    sim_func : function
181
        A function that returns the similarity of two characters (identity
182
        similarity by default)
183
184
    Returns
185
    -------
186
    float
187
        Smith-Waterman score
188
189
    Examples
190
    --------
191
    >>> smith_waterman('cat', 'hat')
192
    2.0
193
    >>> smith_waterman('Niall', 'Neil')
194
    1.0
195
    >>> smith_waterman('aluminum', 'Catalan')
196
    0.0
197
    >>> smith_waterman('ATCG', 'TAGC')
198
    1.0
199
200
    .. versionadded:: 0.1.0
201
202
    """
203 1
    return SmithWaterman(gap_cost, sim_func).sim_score(src, tar)
204
205
206
if __name__ == '__main__':
207
    import doctest
208
209
    doctest.testmod()
210