abydos.distance._smith_waterman.SmithWaterman.dist_abs() - Code Metrics - Inspection of "0.4.1" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#225)

by Chris

created 2019-07-12 00:08 UTC

SmithWaterman.dist_abs() A

↳ Parent: abydos.distance._smith_waterman

Complexity

Conditions

Size

Total Lines	44
Code Lines	10

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	9
CRAP Score	3

Importance

Changes

Metric	Value
eloc	10
dl	0
loc	44
ccs	9
cts	9
cp	1
rs	9.9
c	0
b	0
f	0
cc	3
nop	3
crap	3

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.distance._smith_waterman.

Smith-Waterman score
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from deprecation import deprecated

from numpy import float32 as np_float32
from numpy import zeros as np_zeros

from six.moves import range

from ._ident import sim_ident
from ._needleman_wunsch import NeedlemanWunsch
from .. import __version__

__all__ = ['SmithWaterman', 'smith_waterman']


class SmithWaterman(NeedlemanWunsch):
    """Smith-Waterman score.

    The Smith-Waterman score :cite:`Smith:1981` is a standard edit distance
    measure, differing from Needleman-Wunsch in that it focuses on local
    alignment and disallows negative scores.

    .. versionadded:: 0.3.6
    """

    def __init__(self, gap_cost=1, sim_func=None, **kwargs):
        """Initialize SmithWaterman instance.

        Parameters
        ----------
        gap_cost : float
            The cost of an alignment gap (1 by default)
        sim_func : function
            A function that returns the similarity of two characters (identity
            similarity by default)
        **kwargs
            Arbitrary keyword arguments


        .. versionadded:: 0.4.0

        """
        super(SmithWaterman, self).__init__(**kwargs)
        self._gap_cost = gap_cost
        self._sim_func = sim_func
        if self._sim_func is None:
            self._sim_func = NeedlemanWunsch.sim_matrix

    def sim_score(self, src, tar):
        """Return the Smith-Waterman score of two strings.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        float
            Smith-Waterman score

        Examples
        --------
        >>> cmp = SmithWaterman()
        >>> cmp.sim_score('cat', 'hat')
        2.0
        >>> cmp.sim_score('Niall', 'Neil')
        1.0
        >>> cmp.sim_score('aluminum', 'Catalan')
        0.0
        >>> cmp.sim_score('ATCG', 'TAGC')
        1.0


        .. versionadded:: 0.1.0
        .. versionchanged:: 0.3.6
            Encapsulated in class

        """
        d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)

        for i in range(1, len(src) + 1):
            for j in range(1, len(tar) + 1):
                match = d_mat[i - 1, j - 1] + self._sim_func(
                    src[i - 1], tar[j - 1]
                )
                delete = d_mat[i - 1, j] - self._gap_cost
                insert = d_mat[i, j - 1] - self._gap_cost
                d_mat[i, j] = max(0, match, delete, insert)
        return d_mat[d_mat.shape[0] - 1, d_mat.shape[1] - 1]

    def sim(self, src, tar):
        """Return the normalized Smith-Waterman score of two strings.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        float
            Normalized Smith-Waterman score

        Examples
        --------
        >>> cmp = SmithWaterman()
        >>> cmp.sim('cat', 'hat')
        0.6666666666666667
        >>> cmp.sim('Niall', 'Neil')
        0.22360679774997896
        >>> round(cmp.sim('aluminum', 'Catalan'), 12)
        0.0
        >>> cmp.sim('cat', 'hat')
        0.6666666666666667


        .. versionadded:: 0.4.1

        """
        if src == tar:
            return 1.0
        return max(0.0, self.sim_score(src, tar)) / (
            self.sim_score(src, src) ** 0.5 * self.sim_score(tar, tar) ** 0.5
        )


@deprecated(
    deprecated_in='0.4.0',
    removed_in='0.6.0',
    current_version=__version__,
    details='Use the SmithWaterman.dist_abs method instead.',
)
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
    """Return the Smith-Waterman score of two strings.

    This is a wrapper for :py:meth:`SmithWaterman.dist_abs`.

    Parameters
    ----------
    src : str
        Source string for comparison
    tar : str
        Target string for comparison
    gap_cost : float
        The cost of an alignment gap (1 by default)
    sim_func : function
        A function that returns the similarity of two characters (identity
        similarity by default)

    Returns
    -------
    float
        Smith-Waterman score

    Examples
    --------
    >>> smith_waterman('cat', 'hat')
    2.0
    >>> smith_waterman('Niall', 'Neil')
    1.0
    >>> smith_waterman('aluminum', 'Catalan')
    0.0
    >>> smith_waterman('ATCG', 'TAGC')
    1.0

    .. versionadded:: 0.1.0

    """
    return SmithWaterman(gap_cost, sim_func).sim_score(src, tar)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.distance._smith_waterman.
20
21		Smith-Waterman score
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from deprecation import deprecated
32
33	1	from numpy import float32 as np_float32
34	1	from numpy import zeros as np_zeros
35
36	1	from six.moves import range
37
38	1	from ._ident import sim_ident
39	1	from ._needleman_wunsch import NeedlemanWunsch
40	1	from .. import __version__
41
42	1	__all__ = ['SmithWaterman', 'smith_waterman']
43
44
45	1	class SmithWaterman(NeedlemanWunsch):
46		"""Smith-Waterman score.
47
48		The Smith-Waterman score :cite:`Smith:1981` is a standard edit distance
49		measure, differing from Needleman-Wunsch in that it focuses on local
50		alignment and disallows negative scores.
51
52		.. versionadded:: 0.3.6
53		"""
54
55	1	def __init__(self, gap_cost=1, sim_func=None, **kwargs):
56		"""Initialize SmithWaterman instance.
57
58		Parameters
59		----------
60		gap_cost : float
61		The cost of an alignment gap (1 by default)
62		sim_func : function
63		A function that returns the similarity of two characters (identity
64		similarity by default)
65		**kwargs
66		Arbitrary keyword arguments
67
68
69		.. versionadded:: 0.4.0
70
71		"""
72	1	super(SmithWaterman, self).__init__(**kwargs)
73	1	self._gap_cost = gap_cost
74	1	self._sim_func = sim_func
75	1	if self._sim_func is None:
76	1	self._sim_func = NeedlemanWunsch.sim_matrix
77
78	1	def sim_score(self, src, tar):
79		"""Return the Smith-Waterman score of two strings.
80
81		Parameters
82		----------
83		src : str
84		Source string for comparison
85		tar : str
86		Target string for comparison
87
88		Returns
89		-------
90		float
91		Smith-Waterman score
92
93		Examples
94		--------
95		>>> cmp = SmithWaterman()
96		>>> cmp.sim_score('cat', 'hat')
97		2.0
98		>>> cmp.sim_score('Niall', 'Neil')
99		1.0
100		>>> cmp.sim_score('aluminum', 'Catalan')
101		0.0
102		>>> cmp.sim_score('ATCG', 'TAGC')
103		1.0
104
105
106		.. versionadded:: 0.1.0
107		.. versionchanged:: 0.3.6
108		Encapsulated in class
109
110		"""
111	1	d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
112
113	1	for i in range(1, len(src) + 1):
114	1	for j in range(1, len(tar) + 1):
115	1	match = d_mat[i - 1, j - 1] + self._sim_func(
116		src[i - 1], tar[j - 1]
117		)
118	1	delete = d_mat[i - 1, j] - self._gap_cost
119	1	insert = d_mat[i, j - 1] - self._gap_cost
120	1	d_mat[i, j] = max(0, match, delete, insert)
121	1	return d_mat[d_mat.shape[0] - 1, d_mat.shape[1] - 1]
122
123	1	def sim(self, src, tar):
124		"""Return the normalized Smith-Waterman score of two strings.
125
126		Parameters
127		----------
128		src : str
129		Source string for comparison
130		tar : str
131		Target string for comparison
132
133		Returns
134		-------
135		float
136		Normalized Smith-Waterman score
137
138		Examples
139		--------
140		>>> cmp = SmithWaterman()
141		>>> cmp.sim('cat', 'hat')
142		0.6666666666666667
143		>>> cmp.sim('Niall', 'Neil')
144		0.22360679774997896
145		>>> round(cmp.sim('aluminum', 'Catalan'), 12)
146		0.0
147		>>> cmp.sim('cat', 'hat')
148		0.6666666666666667
149
150
151		.. versionadded:: 0.4.1
152
153		"""
154	1	if src == tar:
155	1	return 1.0
156	1	return max(0.0, self.sim_score(src, tar)) / (
157		self.sim_score(src, src) ** 0.5 * self.sim_score(tar, tar) ** 0.5
158		)
159
160
161	1	@deprecated(
162		deprecated_in='0.4.0',
163		removed_in='0.6.0',
164		current_version=__version__,
165		details='Use the SmithWaterman.dist_abs method instead.',
166		)
167	1	def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
168		"""Return the Smith-Waterman score of two strings.
169
170		This is a wrapper for :py:meth:`SmithWaterman.dist_abs`.
171
172		Parameters
173		----------
174		src : str
175		Source string for comparison
176		tar : str
177		Target string for comparison
178		gap_cost : float
179		The cost of an alignment gap (1 by default)
180		sim_func : function
181		A function that returns the similarity of two characters (identity
182		similarity by default)
183
184		Returns
185		-------
186		float
187		Smith-Waterman score
188
189		Examples
190		--------
191		>>> smith_waterman('cat', 'hat')
192		2.0
193		>>> smith_waterman('Niall', 'Neil')
194		1.0
195		>>> smith_waterman('aluminum', 'Catalan')
196		0.0
197		>>> smith_waterman('ATCG', 'TAGC')
198		1.0
199
200		.. versionadded:: 0.1.0
201
202		"""
203	1	return SmithWaterman(gap_cost, sim_func).sim_score(src, tar)
204
205
206		if __name__ == '__main__':
207		import doctest
208
209		doctest.testmod()
210

chrislit / abydos

Pull Request — master (#225)

SmithWaterman.dist_abs() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like