abydos.distance._gotoh - Code Metrics - Inspection of "0.4.1" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#225)

by Chris

created 2019-07-12 00:08 UTC

abydos.distance._gotoh A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	242
Duplicated Lines	0 %

Test Coverage

Coverage

100%

Importance

Changes

Metric	Value
wmc	10
eloc	69
dl	0
loc	242
ccs	50
cts	50
cp	1
rs	10
c	0
b	0
f	0

3 Methods

Rating	Name	Size	Complexity
A	Gotoh.__init__()	25	2
B	Gotoh.sim_score()	72	5
A	Gotoh.sim()	35	2

1 Function

Rating	Name	Duplication	Size	Complexity
A	gotoh()	0	45	1

# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.distance._gotoh.

Gotoh score
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

from deprecation import deprecated

from numpy import float32 as np_float32
from numpy import zeros as np_zeros

from six.moves import range

from ._ident import sim_ident
from ._needleman_wunsch import NeedlemanWunsch
from .. import __version__

__all__ = ['Gotoh', 'gotoh']


class Gotoh(NeedlemanWunsch):
    """Gotoh score.

    The Gotoh score :cite:`Gotoh:1982` is essentially Needleman-Wunsch with
    affine gap penalties.

    .. versionadded:: 0.3.6
    """

    def __init__(self, gap_open=1, gap_ext=0.4, sim_func=None, **kwargs):
        """Initialize Gotoh instance.

        Parameters
        ----------
        gap_open : float
            The cost of an open alignment gap (1 by default)
        gap_ext : float
            The cost of an alignment gap extension (0.4 by default)
        sim_func : function
            A function that returns the similarity of two characters (identity
            similarity by default)
        **kwargs
            Arbitrary keyword arguments


        .. versionadded:: 0.4.0

        """
        super(Gotoh, self).__init__(**kwargs)
        self._gap_open = gap_open
        self._gap_ext = gap_ext
        self._sim_func = sim_func
        if self._sim_func is None:
            self._sim_func = NeedlemanWunsch.sim_matrix

    def sim_score(self, src, tar):
        """Return the Gotoh score of two strings.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        float
            Gotoh score

        Examples
        --------
        >>> cmp = Gotoh()
        >>> cmp.sim_score('cat', 'hat')
        2.0
        >>> cmp.sim_score('Niall', 'Neil')
        1.0
        >>> round(cmp.sim_score('aluminum', 'Catalan'), 12)
        -0.4
        >>> cmp.sim_score('cat', 'hat')
        2.0


        .. versionadded:: 0.1.0
        .. versionchanged:: 0.3.6
            Encapsulated in class

        """
        d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
        p_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
        q_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)

        d_mat[0, 0] = 0
        p_mat[0, 0] = float('-inf')
        q_mat[0, 0] = float('-inf')
        for i in range(1, len(src) + 1):
            d_mat[i, 0] = float('-inf')
            p_mat[i, 0] = -self._gap_open - self._gap_ext * (i - 1)
            q_mat[i, 0] = float('-inf')
            q_mat[i, 1] = -self._gap_open
        for j in range(1, len(tar) + 1):
            d_mat[0, j] = float('-inf')
            p_mat[0, j] = float('-inf')
            p_mat[1, j] = -self._gap_open
            q_mat[0, j] = -self._gap_open - self._gap_ext * (j - 1)

        for i in range(1, len(src) + 1):
            for j in range(1, len(tar) + 1):
                sim_val = self._sim_func(src[i - 1], tar[j - 1])
                d_mat[i, j] = max(
                    d_mat[i - 1, j - 1] + sim_val,
                    p_mat[i - 1, j - 1] + sim_val,
                    q_mat[i - 1, j - 1] + sim_val,
                )

                p_mat[i, j] = max(
                    d_mat[i - 1, j] - self._gap_open,
                    p_mat[i - 1, j] - self._gap_ext,
                )

                q_mat[i, j] = max(
                    d_mat[i, j - 1] - self._gap_open,
                    q_mat[i, j - 1] - self._gap_ext,
                )

        i, j = (n - 1 for n in d_mat.shape)

        return max(d_mat[i, j], p_mat[i, j], q_mat[i, j])

    def sim(self, src, tar):
        """Return the normalized Gotoh score of two strings.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        float
            Normalized Gotoh score

        Examples
        --------
        >>> cmp = Gotoh()
        >>> cmp.sim('cat', 'hat')
        0.6666666666666667
        >>> cmp.sim('Niall', 'Neil')
        0.22360679774997896
        >>> round(cmp.sim('aluminum', 'Catalan'), 12)
        0.0
        >>> cmp.sim('cat', 'hat')
        0.6666666666666667


        .. versionadded:: 0.4.1

        """
        if src == tar:
            return 1.0
        return max(0.0, self.sim_score(src, tar)) / (
            self.sim_score(src, src) ** 0.5 * self.sim_score(tar, tar) ** 0.5
        )


@deprecated(
    deprecated_in='0.4.0',
    removed_in='0.6.0',
    current_version=__version__,
    details='Use the Gotoh.dist_abs method instead.',
)
def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident):
    """Return the Gotoh score of two strings.

    This is a wrapper for :py:meth:`Gotoh.dist_abs`.

    Parameters
    ----------
    src : str
        Source string for comparison
    tar : str
        Target string for comparison
    gap_open : float
        The cost of an open alignment gap (1 by default)
    gap_ext : float
        The cost of an alignment gap extension (0.4 by default)
    sim_func : function
        A function that returns the similarity of two characters (identity
        similarity by default)

    Returns
    -------
    float
        Gotoh score

    Examples
    --------
    >>> gotoh('cat', 'hat')
    2.0
    >>> gotoh('Niall', 'Neil')
    1.0
    >>> round(gotoh('aluminum', 'Catalan'), 12)
    -0.4
    >>> gotoh('cat', 'hat')
    2.0

    .. versionadded:: 0.1.0

    """
    return Gotoh(gap_open, gap_ext, sim_func).sim_score(src, tar)


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# -- coding: utf-8 --
2
3		# Copyright 2014-2018 by Christopher C. Little.
4		# This file is part of Abydos.
5		#
6		# Abydos is free software: you can redistribute it and/or modify
7		# it under the terms of the GNU General Public License as published by
8		# the Free Software Foundation, either version 3 of the License, or
9		# (at your option) any later version.
10		#
11		# Abydos is distributed in the hope that it will be useful,
12		# but WITHOUT ANY WARRANTY; without even the implied warranty of
13		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14		# GNU General Public License for more details.
15		#
16		# You should have received a copy of the GNU General Public License
17		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19	1	"""abydos.distance._gotoh.
20
21		Gotoh score
22		"""
23
24	1	from __future__ import (
25		absolute_import,
26		division,
27		print_function,
28		unicode_literals,
29		)
30
31	1	from deprecation import deprecated
32
33	1	from numpy import float32 as np_float32
34	1	from numpy import zeros as np_zeros
35
36	1	from six.moves import range
37
38	1	from ._ident import sim_ident
39	1	from ._needleman_wunsch import NeedlemanWunsch
40	1	from .. import __version__
41
42	1	__all__ = ['Gotoh', 'gotoh']
43
44
45	1	class Gotoh(NeedlemanWunsch):
46		"""Gotoh score.
47
48		The Gotoh score :cite:`Gotoh:1982` is essentially Needleman-Wunsch with
49		affine gap penalties.
50
51		.. versionadded:: 0.3.6
52		"""
53
54	1	def __init__(self, gap_open=1, gap_ext=0.4, sim_func=None, **kwargs):
55		"""Initialize Gotoh instance.
56
57		Parameters
58		----------
59		gap_open : float
60		The cost of an open alignment gap (1 by default)
61		gap_ext : float
62		The cost of an alignment gap extension (0.4 by default)
63		sim_func : function
64		A function that returns the similarity of two characters (identity
65		similarity by default)
66		**kwargs
67		Arbitrary keyword arguments
68
69
70		.. versionadded:: 0.4.0
71
72		"""
73	1	super(Gotoh, self).__init__(**kwargs)
74	1	self._gap_open = gap_open
75	1	self._gap_ext = gap_ext
76	1	self._sim_func = sim_func
77	1	if self._sim_func is None:
78	1	self._sim_func = NeedlemanWunsch.sim_matrix
79
80	1	def sim_score(self, src, tar):
81		"""Return the Gotoh score of two strings.
82
83		Parameters
84		----------
85		src : str
86		Source string for comparison
87		tar : str
88		Target string for comparison
89
90		Returns
91		-------
92		float
93		Gotoh score
94
95		Examples
96		--------
97		>>> cmp = Gotoh()
98		>>> cmp.sim_score('cat', 'hat')
99		2.0
100		>>> cmp.sim_score('Niall', 'Neil')
101		1.0
102		>>> round(cmp.sim_score('aluminum', 'Catalan'), 12)
103		-0.4
104		>>> cmp.sim_score('cat', 'hat')
105		2.0
106
107
108		.. versionadded:: 0.1.0
109		.. versionchanged:: 0.3.6
110		Encapsulated in class
111
112		"""
113	1	d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
114	1	p_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
115	1	q_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32)
116
117	1	d_mat[0, 0] = 0
118	1	p_mat[0, 0] = float('-inf')
119	1	q_mat[0, 0] = float('-inf')
120	1	for i in range(1, len(src) + 1):
121	1	d_mat[i, 0] = float('-inf')
122	1	p_mat[i, 0] = -self._gap_open - self._gap_ext * (i - 1)
123	1	q_mat[i, 0] = float('-inf')
124	1	q_mat[i, 1] = -self._gap_open
125	1	for j in range(1, len(tar) + 1):
126	1	d_mat[0, j] = float('-inf')
127	1	p_mat[0, j] = float('-inf')
128	1	p_mat[1, j] = -self._gap_open
129	1	q_mat[0, j] = -self._gap_open - self._gap_ext * (j - 1)
130
131	1	for i in range(1, len(src) + 1):
132	1	for j in range(1, len(tar) + 1):
133	1	sim_val = self._sim_func(src[i - 1], tar[j - 1])
134	1	d_mat[i, j] = max(
135		d_mat[i - 1, j - 1] + sim_val,
136		p_mat[i - 1, j - 1] + sim_val,
137		q_mat[i - 1, j - 1] + sim_val,
138		)
139
140	1	p_mat[i, j] = max(
141		d_mat[i - 1, j] - self._gap_open,
142		p_mat[i - 1, j] - self._gap_ext,
143		)
144
145	1	q_mat[i, j] = max(
146		d_mat[i, j - 1] - self._gap_open,
147		q_mat[i, j - 1] - self._gap_ext,
148		)
149
150	1	i, j = (n - 1 for n in d_mat.shape)
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report The variable `n` does not seem to be defined. Loading history...
151	1	return max(d_mat[i, j], p_mat[i, j], q_mat[i, j])
152
153	1	def sim(self, src, tar):
154		"""Return the normalized Gotoh score of two strings.
155
156		Parameters
157		----------
158		src : str
159		Source string for comparison
160		tar : str
161		Target string for comparison
162
163		Returns
164		-------
165		float
166		Normalized Gotoh score
167
168		Examples
169		--------
170		>>> cmp = Gotoh()
171		>>> cmp.sim('cat', 'hat')
172		0.6666666666666667
173		>>> cmp.sim('Niall', 'Neil')
174		0.22360679774997896
175		>>> round(cmp.sim('aluminum', 'Catalan'), 12)
176		0.0
177		>>> cmp.sim('cat', 'hat')
178		0.6666666666666667
179
180
181		.. versionadded:: 0.4.1
182
183		"""
184	1	if src == tar:
185	1	return 1.0
186	1	return max(0.0, self.sim_score(src, tar)) / (
187		self.sim_score(src, src) ** 0.5 * self.sim_score(tar, tar) ** 0.5
188		)
189
190
191	1	@deprecated(
192		deprecated_in='0.4.0',
193		removed_in='0.6.0',
194		current_version=__version__,
195		details='Use the Gotoh.dist_abs method instead.',
196		)
197	1	def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident):
198		"""Return the Gotoh score of two strings.
199
200		This is a wrapper for :py:meth:`Gotoh.dist_abs`.
201
202		Parameters
203		----------
204		src : str
205		Source string for comparison
206		tar : str
207		Target string for comparison
208		gap_open : float
209		The cost of an open alignment gap (1 by default)
210		gap_ext : float
211		The cost of an alignment gap extension (0.4 by default)
212		sim_func : function
213		A function that returns the similarity of two characters (identity
214		similarity by default)
215
216		Returns
217		-------
218		float
219		Gotoh score
220
221		Examples
222		--------
223		>>> gotoh('cat', 'hat')
224		2.0
225		>>> gotoh('Niall', 'Neil')
226		1.0
227		>>> round(gotoh('aluminum', 'Catalan'), 12)
228		-0.4
229		>>> gotoh('cat', 'hat')
230		2.0
231
232		.. versionadded:: 0.1.0
233
234		"""
235	1	return Gotoh(gap_open, gap_ext, sim_func).sim_score(src, tar)
236
237
238		if __name__ == '__main__':
239		import doctest
240
241		doctest.testmod()
242

chrislit / abydos

Pull Request — master (#225)

abydos.distance._gotoh A

Complexity

Size/Duplication

Test Coverage

Importance

3 Methods

1 Function

Duplication Side-by-Side

Filter issues like