Issues in _gotoh.py (master) - Issues in master - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Issues (140)

abydos/distance/_gotoh.py (1 issue)

Labels

Severity

Minor 1

# Copyright 2014-2020 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.

"""abydos.distance._gotoh.

Gotoh score
"""
from typing import Any, Callable, Optional, cast

from numpy import float_ as np_float
from numpy import zeros as np_zeros

from ._needleman_wunsch import NeedlemanWunsch

__all__ = ['Gotoh']


class Gotoh(NeedlemanWunsch):
    """Gotoh score.

    The Gotoh score :cite:`Gotoh:1982` is essentially Needleman-Wunsch with
    affine gap penalties.

    .. versionadded:: 0.3.6
    """

    def __init__(
        self,
        gap_open: float = 1,
        gap_ext: float = 0.4,
        sim_func: Optional[Callable[[str, str], float]] = None,
        **kwargs: Any
    ) -> None:
        """Initialize Gotoh instance.

        Parameters
        ----------
        gap_open : float
            The cost of an open alignment gap (1 by default)
        gap_ext : float
            The cost of an alignment gap extension (0.4 by default)
        sim_func : function
            A function that returns the similarity of two characters (identity
            similarity by default)
        **kwargs
            Arbitrary keyword arguments


        .. versionadded:: 0.4.0

        """
        super(Gotoh, self).__init__(**kwargs)
        self._gap_open = gap_open
        self._gap_ext = gap_ext
        self._sim_func = cast(
            Callable[[str, str], float],
            NeedlemanWunsch.sim_matrix if sim_func is None else sim_func,
        )  # type: Callable[[str, str], float]

    def sim_score(self, src: str, tar: str) -> float:
        """Return the Gotoh score of two strings.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        float
            Gotoh score

        Examples
        --------
        >>> cmp = Gotoh()
        >>> cmp.sim_score('cat', 'hat')
        2.0
        >>> cmp.sim_score('Niall', 'Neil')
        1.0
        >>> round(cmp.sim_score('aluminum', 'Catalan'), 12)
        -0.4
        >>> cmp.sim_score('cat', 'hat')
        2.0


        .. versionadded:: 0.1.0
        .. versionchanged:: 0.3.6
            Encapsulated in class

        """
        d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float)
        p_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float)
        q_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float)

        d_mat[0, 0] = 0
        p_mat[0, 0] = float('-inf')
        q_mat[0, 0] = float('-inf')
        for i in range(1, len(src) + 1):
            d_mat[i, 0] = float('-inf')
            p_mat[i, 0] = -self._gap_open - self._gap_ext * (i - 1)
            q_mat[i, 0] = float('-inf')
            if len(tar) > 1:
                q_mat[i, 1] = -self._gap_open
        for j in range(1, len(tar) + 1):
            d_mat[0, j] = float('-inf')
            p_mat[0, j] = float('-inf')
            if len(src) > 1:
                p_mat[1, j] = -self._gap_open
            q_mat[0, j] = -self._gap_open - self._gap_ext * (j - 1)

        for i in range(1, len(src) + 1):
            for j in range(1, len(tar) + 1):
                sim_val = self._sim_func(src[i - 1], tar[j - 1])
                d_mat[i, j] = max(
                    d_mat[i - 1, j - 1] + sim_val,
                    p_mat[i - 1, j - 1] + sim_val,
                    q_mat[i - 1, j - 1] + sim_val,
                )

                p_mat[i, j] = max(
                    d_mat[i - 1, j] - self._gap_open,
                    p_mat[i - 1, j] - self._gap_ext,
                )

                q_mat[i, j] = max(
                    d_mat[i, j - 1] - self._gap_open,
                    q_mat[i, j - 1] - self._gap_ext,
                )

        i, j = (n - 1 for n in d_mat.shape)

        return cast(float, max(d_mat[i, j], p_mat[i, j], q_mat[i, j]))

    def sim(self, src: str, tar: str) -> float:
        """Return the normalized Gotoh score of two strings.

        Parameters
        ----------
        src : str
            Source string for comparison
        tar : str
            Target string for comparison

        Returns
        -------
        float
            Normalized Gotoh score

        Examples
        --------
        >>> cmp = Gotoh()
        >>> cmp.sim('cat', 'hat')
        0.6666666666666667
        >>> cmp.sim('Niall', 'Neil')
        0.22360679774997896
        >>> round(cmp.sim('aluminum', 'Catalan'), 12)
        0.0
        >>> cmp.sim('cat', 'hat')
        0.6666666666666667


        .. versionadded:: 0.4.1

        """
        if src == tar:
            return 1.0
        return max(0.0, self.sim_score(src, tar)) / (
            self.sim_score(src, src) ** 0.5 * self.sim_score(tar, tar) ** 0.5
        )


if __name__ == '__main__':
    import doctest

    doctest.testmod()


1		# Copyright 2014-2020 by Christopher C. Little.
2		# This file is part of Abydos.
3		#
4		# Abydos is free software: you can redistribute it and/or modify
5		# it under the terms of the GNU General Public License as published by
6		# the Free Software Foundation, either version 3 of the License, or
7		# (at your option) any later version.
8		#
9		# Abydos is distributed in the hope that it will be useful,
10		# but WITHOUT ANY WARRANTY; without even the implied warranty of
11		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12		# GNU General Public License for more details.
13		#
14		# You should have received a copy of the GNU General Public License
15		# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17		"""abydos.distance._gotoh.
18
19	1	Gotoh score
20		"""
21		from typing import Any, Callable, Optional, cast
22
23		from numpy import float_ as np_float
24	1	from numpy import zeros as np_zeros
25
26		from ._needleman_wunsch import NeedlemanWunsch
27
28		__all__ = ['Gotoh']
29
30
31	1	class Gotoh(NeedlemanWunsch):
32		"""Gotoh score.
33	1
34	1	The Gotoh score :cite:`Gotoh:1982` is essentially Needleman-Wunsch with
35		affine gap penalties.
36	1
37		.. versionadded:: 0.3.6
38	1	"""
39	1
40	1	def __init__(
41		self,
42	1	gap_open: float = 1,
43		gap_ext: float = 0.4,
44		sim_func: Optional[Callable[[str, str], float]] = None,
45	1	**kwargs: Any
46		) -> None:
47		"""Initialize Gotoh instance.
48
49		Parameters
50		----------
51		gap_open : float
52		The cost of an open alignment gap (1 by default)
53		gap_ext : float
54	1	The cost of an alignment gap extension (0.4 by default)
55		sim_func : function
56		A function that returns the similarity of two characters (identity
57		similarity by default)
58		**kwargs
59		Arbitrary keyword arguments
60
61
62		.. versionadded:: 0.4.0
63
64		"""
65		super(Gotoh, self).__init__(**kwargs)
66		self._gap_open = gap_open
67		self._gap_ext = gap_ext
68		self._sim_func = cast(
69		Callable[[str, str], float],
70		NeedlemanWunsch.sim_matrix if sim_func is None else sim_func,
71		) # type: Callable[[str, str], float]
72
73	1	def sim_score(self, src: str, tar: str) -> float:
74	1	"""Return the Gotoh score of two strings.
75	1
76	1	Parameters
77	1	----------
78	1	src : str
79		Source string for comparison
80	1	tar : str
81		Target string for comparison
82
83		Returns
84		-------
85		float
86		Gotoh score
87
88		Examples
89		--------
90		>>> cmp = Gotoh()
91		>>> cmp.sim_score('cat', 'hat')
92		2.0
93		>>> cmp.sim_score('Niall', 'Neil')
94		1.0
95		>>> round(cmp.sim_score('aluminum', 'Catalan'), 12)
96		-0.4
97		>>> cmp.sim_score('cat', 'hat')
98		2.0
99
100
101		.. versionadded:: 0.1.0
102		.. versionchanged:: 0.3.6
103		Encapsulated in class
104
105		"""
106		d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float)
107		p_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float)
108		q_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float)
109
110		d_mat[0, 0] = 0
111		p_mat[0, 0] = float('-inf')
112		q_mat[0, 0] = float('-inf')
113	1	for i in range(1, len(src) + 1):
114	1	d_mat[i, 0] = float('-inf')
115	1	p_mat[i, 0] = -self._gap_open - self._gap_ext * (i - 1)
116		q_mat[i, 0] = float('-inf')
117	1	if len(tar) > 1:
118	1	q_mat[i, 1] = -self._gap_open
119	1	for j in range(1, len(tar) + 1):
120	1	d_mat[0, j] = float('-inf')
121	1	p_mat[0, j] = float('-inf')
122	1	if len(src) > 1:
123	1	p_mat[1, j] = -self._gap_open
124	1	q_mat[0, j] = -self._gap_open - self._gap_ext * (j - 1)
125	1
126	1	for i in range(1, len(src) + 1):
127	1	for j in range(1, len(tar) + 1):
128	1	sim_val = self._sim_func(src[i - 1], tar[j - 1])
129	1	d_mat[i, j] = max(
130	1	d_mat[i - 1, j - 1] + sim_val,
131	1	p_mat[i - 1, j - 1] + sim_val,
132		q_mat[i - 1, j - 1] + sim_val,
133	1	)
134	1
135	1	p_mat[i, j] = max(
136	1	d_mat[i - 1, j] - self._gap_open,
137		p_mat[i - 1, j] - self._gap_ext,
138		)
139
140		q_mat[i, j] = max(
141		d_mat[i, j - 1] - self._gap_open,
142	1	q_mat[i, j - 1] - self._gap_ext,
143		)
144
145		i, j = (n - 1 for n in d_mat.shape)
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-02 19:04 UTC by Report Bug Copy Issue Report Show Similar Issues like this The variable `n` does not seem to be defined. Loading history...
146		return cast(float, max(d_mat[i, j], p_mat[i, j], q_mat[i, j]))
147	1
148		def sim(self, src: str, tar: str) -> float:
149		"""Return the normalized Gotoh score of two strings.
150
151		Parameters
152	1	----------
153	1	src : str
154		Source string for comparison
155	1	tar : str
156		Target string for comparison
157
158		Returns
159		-------
160		float
161		Normalized Gotoh score
162
163		Examples
164		--------
165		>>> cmp = Gotoh()
166		>>> cmp.sim('cat', 'hat')
167		0.6666666666666667
168		>>> cmp.sim('Niall', 'Neil')
169		0.22360679774997896
170		>>> round(cmp.sim('aluminum', 'Catalan'), 12)
171		0.0
172		>>> cmp.sim('cat', 'hat')
173		0.6666666666666667
174
175
176		.. versionadded:: 0.4.1
177
178		"""
179		if src == tar:
180		return 1.0
181		return max(0.0, self.sim_score(src, tar)) / (
182		self.sim_score(src, src) ** 0.5 * self.sim_score(tar, tar) ** 0.5
183		)
184
185
186	1	if __name__ == '__main__':
187	1	import doctest
188	1
189		doctest.testmod()
190

chrislit / abydos

Issues (140)

abydos/distance/_gotoh.py (1 issue)

Labels

Severity

Introduced By

Duplication Side-by-Side

Filter issues like