Test Failed
Push — master ( 23810f...afe14d )
by Chris
09:47
created

abydos/distance/_smith_waterman.py (1 issue)

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.distance._smith_waterman.
20
21
Smith-Waterman score
22
"""
23
24 1
from __future__ import (
25
    absolute_import,
26
    division,
27
    print_function,
28
    unicode_literals,
29
)
30
31 1
from numpy import float32 as np_float32
32 1
from numpy import zeros as np_zeros
33
34 1
from six.moves import range
35
36 1
from ._ident import sim_ident
37 1
from ._needleman_wunsch import NeedlemanWunsch
38
39 1
__all__ = ['SmithWaterman', 'smith_waterman']
40
41
42 1
class SmithWaterman(NeedlemanWunsch):
43
    """Smith-Waterman score.
44
45
    The Smith-Waterman score :cite:`Smith:1981` is a standard edit distance
46
    measure, differing from Needleman-Wunsch in that it focuses on local
47
    alignment and disallows negative scores.
48
    """
49
50 1 View Code Duplication
    def dist_abs(self, src, tar, gap_cost=1, sim_func=sim_ident):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
51
        """Return the Smith-Waterman score of two strings.
52
53
        Parameters
54
        ----------
55
        src : str
56
            Source string for comparison
57
        tar : str
58
            Target string for comparison
59
        gap_cost : float
60
            The cost of an alignment gap (1 by default)
61
        sim_func : function
62
            A function that returns the similarity of two characters (identity
63
            similarity by default)
64
65
        Returns
66
        -------
67
        float
68
            Smith-Waterman score
69
70
        Examples
71
        --------
72
        >>> cmp = SmithWaterman()
73
        >>> cmp.dist_abs('cat', 'hat')
74
        2.0
75
        >>> cmp.dist_abs('Niall', 'Neil')
76
        1.0
77
        >>> cmp.dist_abs('aluminum', 'Catalan')
78
        0.0
79
        >>> cmp.dist_abs('ATCG', 'TAGC')
80
        1.0
81
82
        """
83 1
        d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
84
85 1
        for i in range(len(src) + 1):
86 1
            d_mat[i, 0] = 0
87 1
        for j in range(len(tar) + 1):
88 1
            d_mat[0, j] = 0
89 1
        for i in range(1, len(src) + 1):
90 1
            for j in range(1, len(tar) + 1):
91 1
                match = d_mat[i - 1, j - 1] + sim_func(src[i - 1], tar[j - 1])
92 1
                delete = d_mat[i - 1, j] - gap_cost
93 1
                insert = d_mat[i, j - 1] - gap_cost
94 1
                d_mat[i, j] = max(0, match, delete, insert)
95 1
        return d_mat[d_mat.shape[0] - 1, d_mat.shape[1] - 1]
96
97
98 1
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
99
    """Return the Smith-Waterman score of two strings.
100
101
    This is a wrapper for :py:meth:`SmithWaterman.dist_abs`.
102
103
    Parameters
104
    ----------
105
    src : str
106
        Source string for comparison
107
    tar : str
108
        Target string for comparison
109
    gap_cost : float
110
        The cost of an alignment gap (1 by default)
111
    sim_func : function
112
        A function that returns the similarity of two characters (identity
113
        similarity by default)
114
115
    Returns
116
    -------
117
    float
118
        Smith-Waterman score
119
120
    Examples
121
    --------
122
    >>> smith_waterman('cat', 'hat')
123
    2.0
124
    >>> smith_waterman('Niall', 'Neil')
125
    1.0
126
    >>> smith_waterman('aluminum', 'Catalan')
127
    0.0
128
    >>> smith_waterman('ATCG', 'TAGC')
129
    1.0
130
131
    """
132 1
    return SmithWaterman().dist_abs(src, tar, gap_cost, sim_func)
133
134
135
if __name__ == '__main__':
136
    import doctest
137
138
    doctest.testmod()
139